Esempio n. 1
0
class MapTilingWithOverlap(MapTiling):
    """ Implements the orthogonal tiling transformation with overlap.

        Orthogonal tiling is a type of nested map fission that creates tiles
        in every dimension of the matched Map. The overlap can vary in each
        dimension and direction. It is added to each tile and the starting
        and end points of the outer map are adjusted to account for the overlap.
    """

    # Properties
    lower_overlap = ShapeProperty(dtype=tuple,
                                  default=None,
                                  desc="Lower overlap per dimension")
    upper_overlap = ShapeProperty(dtype=tuple,
                                  default=None,
                                  desc="Upper overlap per dimension")

    def apply(self, sdfg):
        if len(self.lower_overlap) == 0:
            return
        if len(self.upper_overlap) == 0:
            return
        
        graph = sdfg.nodes()[self.state_id]
        map_entry = graph.nodes()[self.subgraph[self.map_entry]]

        # Tile the map
        self.tile_trivial = True
        super().apply(sdfg)
        tile_map_entry = graph.in_edges(map_entry)[0].src
        tile_map_exit = graph.exit_node(tile_map_entry)

        # Introduce overlap        
        for lower_overlap, upper_overlap, param in zip(self.lower_overlap, self.upper_overlap, tile_map_entry.params):
            pystr = pystr_to_symbolic(param)
            lower_replace_dict = {pystr: pystr - lower_overlap}
            upper_replace_dict = {pystr: pystr + upper_overlap}

            # Extend the range of the inner map
            map_entry.range.ranges = [
                (r[0].subs(lower_replace_dict), r[1].subs(upper_replace_dict), r[2])
                for r in map_entry.range.ranges]

            # Fix the memlets
            for edge in graph.out_edges(tile_map_entry) + graph.in_edges(tile_map_exit):
                edge.data.subset.ranges = [
                    (r[0].subs(lower_replace_dict), r[1].subs(upper_replace_dict), r[2])
                    for r in edge.data.subset.ranges]

        # Reduce the range of the tile_map
        tile_map_entry.range.ranges = [(r[0]+lo, r[1]-uo, r[2]) 
            for r,lo,uo in zip(tile_map_entry.range.ranges, self.lower_overlap, self.upper_overlap)]
Esempio n. 2
0
class MapDimInterchange(pm.Transformation):
    """ Implements the map-dimension-interchange pattern.

        Map-dimension-interchange re-orders the dimensions of a map.
    """

    _map_entry = nodes.MapEntry(None)

    order = ShapeProperty()

    @staticmethod
    def expressions():
        return [nxutil.node_path_graph(MapDimInterchange._map_entry)]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        """ A candidate subgraph matches the map-dimension-interchange 
            transformation when a map has at least two dimensions.
        """
        map_entry = graph.nodes()[candidate[MapDimInterchange._map_entry]]
        return map_entry.map.get_param_num() > 1

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = candidate[MapDimInterchange._map_entry]

        return str(map_entry)

    def apply(self, sdfg):
        """ Reorders the dimensions of the map by reordering the
            parameters and the range of the map as specified through the 
            properties.
        """

        # Extract the map and its entry node.
        graph = sdfg.nodes()[self.state_id]
        map_entry = graph.nodes()[self.subgraph[MapDimInterchange._map_entry]]
        current_map = map_entry.map

        order = self.order
        if len(self.order) != current_map.get_param_num():
            # 'order' must be of the same length as the number of map
            # dimensions.
            return

        # Re-order the map dimensions
        current_map.params = [current_map.params[idx] for idx in order]
        current_map.range.reorder(order)

        return

    def __init__(self, *args, **kwargs):
        self.entry = nodes.EntryNode()
        self.tasklet = nodes.Tasklet('_')
        self.exit = nodes.ExitNode()
        self.pairs = None
        super().__init__(*args, **kwargs)

    def modifies_graph(self):
        return True
Esempio n. 3
0
class MapDimShuffle(transformation.Transformation):
    """ Implements the map-dim shuffle transformation.
    
        MapDimShuffle takes a map and a list of params.
        It reorders the dimensions in the map such that it matches the list.
    """

    _map_entry = transformation.PatternNode(nodes.MapEntry)

    # Properties
    parameters = ShapeProperty(dtype=list,
                               default=None,
                               desc="Desired order of map parameters")

    @staticmethod
    def expressions():
        return [sdutil.node_path_graph(MapDimShuffle._map_entry)]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, permissive=False):
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = graph.nodes()[candidate[MapDimShuffle._map_entry]]
        return map_entry.map.label + ': ' + str(map_entry.map.params)

    def apply(self, sdfg: SDFG):
        graph = sdfg.nodes()[self.state_id]
        map_entry = graph.nodes()[self.subgraph[self._map_entry]]

        if set(self.parameters) != set(map_entry.map.params):
            return

        map_entry.range.ranges = [
            r for list_param in self.parameters for map_param, r in zip(
                map_entry.map.params, map_entry.range.ranges)
            if list_param == map_param
        ]
        map_entry.map.params = self.parameters
Esempio n. 4
0
class MapTiling(pattern_matching.Transformation):
    """ Implements the orthogonal tiling transformation.

        Orthogonal tiling is a type of nested map fission that creates tiles
        in every dimension of the matched Map.
    """

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    # Properties
    prefix = Property(dtype=str,
                      default="tile",
                      desc="Prefix for new range symbols")
    tile_sizes = ShapeProperty(dtype=tuple,
                               default=(128, 128, 128),
                               desc="Tile size per dimension")
    strides = ShapeProperty(
        dtype=tuple,
        default=tuple(),
        desc="Tile stride (enables overlapping tiles). If empty, matches tile")
    divides_evenly = Property(dtype=bool,
                              default=False,
                              desc="Tile size divides dimension length evenly")

    @staticmethod
    def annotates_memlets():
        return True

    @staticmethod
    def expressions():
        return [nxutil.node_path_graph(MapTiling._map_entry)]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = graph.nodes()[candidate[MapTiling._map_entry]]
        return map_entry.map.label + ': ' + str(map_entry.map.params)

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]

        tile_strides = self.tile_sizes
        if self.strides is not None and len(self.strides) == len(tile_strides):
            tile_strides = self.strides

        # Retrieve map entry and exit nodes.
        map_entry = graph.nodes()[self.subgraph[MapTiling._map_entry]]
        from dace.transformation.dataflow.map_collapse import MapCollapse
        from dace.transformation.dataflow.strip_mining import StripMining
        stripmine_subgraph = {
            StripMining._map_entry: self.subgraph[MapTiling._map_entry]
        }
        sdfg_id = sdfg.sdfg_list.index(sdfg)
        last_map_entry = None
        removed_maps = 0

        original_schedule = map_entry.schedule

        for dim_idx in range(len(map_entry.map.params)):
            if dim_idx >= len(self.tile_sizes):
                tile_size = symbolic.pystr_to_symbolic(self.tile_sizes[-1])
                tile_stride = symbolic.pystr_to_symbolic(tile_strides[-1])
            else:
                tile_size = symbolic.pystr_to_symbolic(
                    self.tile_sizes[dim_idx])
                tile_stride = symbolic.pystr_to_symbolic(tile_strides[dim_idx])

            dim_idx -= removed_maps
            # If tile size is trivial, skip strip-mining map dimension
            if tile_size == map_entry.map.range.size()[dim_idx]:
                continue

            stripmine = StripMining(sdfg_id, self.state_id, stripmine_subgraph,
                                    self.expr_index)

            # Special case: Tile size of 1 should be omitted from inner map
            if tile_size == 1 and tile_stride == 1:
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = ''
                stripmine.tile_size = str(tile_size)
                stripmine.tile_stride = str(tile_stride)
                stripmine.divides_evenly = True
                stripmine.apply(sdfg)
                removed_maps += 1
            else:
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = self.prefix
                stripmine.tile_size = str(tile_size)
                stripmine.tile_stride = str(tile_stride)
                stripmine.divides_evenly = self.divides_evenly
                stripmine.apply(sdfg)

            # apply to the new map the schedule of the original one
            map_entry.schedule = original_schedule

            if last_map_entry:
                new_map_entry = graph.in_edges(map_entry)[0].src
                mapcollapse_subgraph = {
                    MapCollapse._outer_map_entry:
                    graph.node_id(last_map_entry),
                    MapCollapse._inner_map_entry: graph.node_id(new_map_entry)
                }
                mapcollapse = MapCollapse(sdfg_id, self.state_id,
                                          mapcollapse_subgraph, 0)
                mapcollapse.apply(sdfg)
            last_map_entry = graph.in_edges(map_entry)[0].src
Esempio n. 5
0
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses)
    shape = ShapeProperty(default=[])
    transient = Property(dtype=bool, default=False)
    storage = EnumProperty(dtype=dtypes.StorageType,
                           desc="Storage location",
                           default=dtypes.StorageType.Default)
    lifetime = EnumProperty(dtype=dtypes.AllocationLifetime,
                            desc='Data allocation span',
                            default=dtypes.AllocationLifetime.Scope)
    location = DictProperty(
        key_type=str,
        value_type=symbolic.pystr_to_symbolic,
        desc='Full storage location identifier (e.g., rank, GPU ID)')
    debuginfo = DebugInfoProperty(allow_none=True)

    def __init__(self, dtype, shape, transient, storage, location, lifetime,
                 debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location if location is not None else {}
        self.lifetime = lifetime
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values '
                            'or symbols')
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @property
    def toplevel(self):
        return self.lifetime is not dtypes.AllocationLifetime.Scope

    def copy(self):
        raise RuntimeError(
            'Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def as_arg(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    @property
    def free_symbols(self) -> Set[symbolic.SymbolicType]:
        """ Returns a set of undefined symbols in this data descriptor. """
        result = set()
        for s in self.shape:
            if isinstance(s, sp.Basic):
                result |= set(s.free_symbols)
        return result

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'

    @property
    def veclen(self):
        return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1

    @property
    def ctype(self):
        return self.dtype.ctype
Esempio n. 6
0
class Array(Data):
    """ Array/constant descriptor (dimensions, type and other properties). """

    # Properties
    allow_conflicts = Property(
        dtype=bool,
        default=False,
        desc='If enabled, allows more than one '
        'memlet to write to the same memory location without conflict '
        'resolution.')

    strides = ShapeProperty(
        # element_type=symbolic.pystr_to_symbolic,
        desc='For each dimension, the number of elements to '
        'skip in order to obtain the next element in '
        'that dimension.')

    total_size = SymbolicProperty(
        default=1,
        desc='The total allocated size of the array. Can be used for'
        ' padding.')

    offset = ListProperty(element_type=symbolic.pystr_to_symbolic,
                          desc='Initial offset to translate all indices by.')

    may_alias = Property(dtype=bool,
                         default=False,
                         desc='This pointer may alias with other pointers in '
                         'the same function')

    alignment = Property(dtype=int,
                         default=0,
                         desc='Allocation alignment in bytes (0 uses '
                         'compiler-default)')

    def __init__(self,
                 dtype,
                 shape,
                 transient=False,
                 allow_conflicts=False,
                 storage=dtypes.StorageType.Default,
                 location=None,
                 strides=None,
                 offset=None,
                 may_alias=False,
                 lifetime=dtypes.AllocationLifetime.Scope,
                 alignment=0,
                 debuginfo=None,
                 total_size=None):

        super(Array, self).__init__(dtype, shape, transient, storage, location,
                                    lifetime, debuginfo)

        if shape is None:
            raise IndexError('Shape must not be None')

        self.allow_conflicts = allow_conflicts
        self.may_alias = may_alias
        self.alignment = alignment

        if strides is not None:
            self.strides = cp.copy(strides)
        else:
            self.strides = [_prod(shape[i + 1:]) for i in range(len(shape))]

        self.total_size = total_size or _prod(shape)

        if offset is not None:
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        self.validate()

    def __repr__(self):
        return '%s (dtype=%s, shape=%s)' % (type(self).__name__, self.dtype,
                                            self.shape)

    def clone(self):
        return type(self)(self.dtype, self.shape, self.transient,
                          self.allow_conflicts, self.storage, self.location,
                          self.strides, self.offset, self.may_alias,
                          self.lifetime, self.alignment, self.debuginfo,
                          self.total_size)

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        # Take care of symbolic expressions
        attrs['strides'] = list(map(str, attrs['strides']))

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls(dtypes.int8, ())
        serialize.set_properties_from_json(ret, json_obj, context=context)
        # TODO: This needs to be reworked (i.e. integrated into the list property)
        ret.strides = list(map(symbolic.pystr_to_symbolic, ret.strides))

        # Check validity now
        ret.validate()
        return ret

    def validate(self):
        super(Array, self).validate()
        if len(self.strides) != len(self.shape):
            raise TypeError('Strides must be the same size as shape')

        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.strides):
            raise TypeError('Strides must be a list or tuple of integer '
                            'values or symbols')

        if len(self.offset) != len(self.shape):
            raise TypeError('Offset must be the same size as shape')

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sp.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sp.Symbol(str(s), **s.assumptions0)
                else:
                    s = sp.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sp.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sp.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sp.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, type(self)):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # Any other case (constant vs. constant), check for equality
            if otherdim != dim:
                return False
        return True

    def as_arg(self, with_types=True, for_call=False, name=None):
        arrname = name

        if not with_types or for_call:
            return arrname
        if self.may_alias:
            return str(self.dtype.ctype) + ' *' + arrname
        return str(self.dtype.ctype) + ' * __restrict__ ' + arrname

    def sizes(self):
        return [
            d.name if isinstance(d, symbolic.symbol) else str(d)
            for d in self.shape
        ]

    @property
    def free_symbols(self):
        result = super().free_symbols
        for s in self.strides:
            if isinstance(s, sp.Expr):
                result |= set(s.free_symbols)
        if isinstance(self.total_size, sp.Expr):
            result |= set(self.total_size.free_symbols)
        for o in self.offset:
            if isinstance(o, sp.Expr):
                result |= set(o.free_symbols)

        return result
Esempio n. 7
0
class OrthogonalTiling(pattern_matching.Transformation):
    """ Implements the orthogonal tiling transformation.

        Orthogonal tiling is a type of nested map fission that creates tiles
        in every dimension of the matched Map.
    """

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    # Properties
    prefix = Property(
        dtype=str, default="tile", desc="Prefix for new iterators")
    tile_sizes = ShapeProperty(
        dtype=tuple, default=(128, 128, 128), desc="Tile size per dimension")
    divides_evenly = Property(
        dtype=bool,
        default=False,
        desc="Tile size divides dimension length evenly")

    @staticmethod
    def annotates_memlets():
        return False

    @staticmethod
    def expressions():
        return [nxutil.node_path_graph(OrthogonalTiling._map_entry)]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]]
        return map_entry.map.label + ': ' + str(map_entry.map.params)

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        # Tile map.
        target_dim, new_dim, new_map = self.__stripmine(
            sdfg, graph, self.subgraph)
        return new_map

    def __stripmine(self, sdfg, graph, candidate):
        # Retrieve map entry and exit nodes.
        map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]]
        map_exit = graph.exit_nodes(map_entry)[0]

        # Map subgraph
        map_subgraph = graph.scope_subgraph(map_entry)

        # Retrieve transformation properties.
        prefix = self.prefix
        tile_sizes = self.tile_sizes
        divides_evenly = self.divides_evenly

        new_param = []
        new_range = []

        for dim_idx in range(len(map_entry.map.params)):

            if dim_idx >= len(tile_sizes):
                tile_size = tile_sizes[-1]
            else:
                tile_size = tile_sizes[dim_idx]

            # Retrieve parameter and range of dimension to be strip-mined.
            target_dim = map_entry.map.params[dim_idx]
            td_from, td_to, td_step = map_entry.map.range[dim_idx]

            new_dim = prefix + '_' + target_dim

            # Basic values
            if divides_evenly:
                tile_num = '(%s + 1 - %s) / %s' % (symbolic.symstr(td_to),
                                                   symbolic.symstr(td_from),
                                                   str(tile_size))
            else:
                tile_num = 'int_ceil((%s + 1 - %s), %s)' % (symbolic.symstr(
                    td_to), symbolic.symstr(td_from), str(tile_size))

            # Outer map values (over all tiles)
            nd_from = 0
            nd_to = symbolic.pystr_to_symbolic(str(tile_num) + ' - 1')
            nd_step = 1

            # Inner map values (over one tile)
            td_from_new = dace.symbolic.pystr_to_symbolic(td_from)
            td_to_new_exact = symbolic.pystr_to_symbolic(
                'min(%s + 1 - %s * %s, %s + %s) - 1' %
                (symbolic.symstr(td_to), str(new_dim), str(tile_size),
                 td_from_new, str(tile_size)))
            td_to_new_approx = symbolic.pystr_to_symbolic(
                '%s + %s - 1' % (td_from_new, str(tile_size)))

            # Outer map (over all tiles)
            new_dim_range = (nd_from, nd_to, nd_step)
            new_param.append(new_dim)
            new_range.append(new_dim_range)

            # Inner map (over one tile)
            if divides_evenly:
                td_to_new = td_to_new_approx
            else:
                td_to_new = dace.symbolic.SymExpr(td_to_new_exact,
                                                  td_to_new_approx)
            map_entry.map.range[dim_idx] = (td_from_new, td_to_new, td_step)

            # Fix subgraph memlets
            target_dim = dace.symbolic.pystr_to_symbolic(target_dim)
            offset = dace.symbolic.pystr_to_symbolic(
                '%s * %s' % (new_dim, str(tile_size)))
            for _, _, _, _, memlet in map_subgraph.edges():
                old_subset = memlet.subset
                if isinstance(old_subset, dace.subsets.Indices):
                    new_indices = []
                    for idx in old_subset:
                        new_idx = idx.subs(target_dim, target_dim + offset)
                        new_indices.append(new_idx)
                    memlet.subset = dace.subsets.Indices(new_indices)
                elif isinstance(old_subset, dace.subsets.Range):
                    new_ranges = []
                    for i, old_range in enumerate(old_subset):
                        if len(old_range) == 3:
                            b, e, s, = old_range
                            t = old_subset.tile_sizes[i]
                        else:
                            raise ValueError(
                                'Range %s is invalid.' % old_range)
                        new_b = b.subs(target_dim, target_dim + offset)
                        new_e = e.subs(target_dim, target_dim + offset)
                        new_s = s.subs(target_dim, target_dim + offset)
                        new_t = t.subs(target_dim, target_dim + offset)
                        new_ranges.append((new_b, new_e, new_s, new_t))
                    memlet.subset = dace.subsets.Range(new_ranges)
                else:
                    raise NotImplementedError

        new_map = nodes.Map(prefix + '_' + map_entry.map.label, new_param,
                            subsets.Range(new_range))
        new_map_entry = nodes.MapEntry(new_map)
        new_exit = nodes.MapExit(new_map)

        # Make internal map's schedule to "not parallel"
        map_entry.map._schedule = dtypes.ScheduleType.Default

        # Redirect/create edges.
        new_in_edges = {}
        for _src, conn, _dest, _, memlet in graph.out_edges(map_entry):
            if not isinstance(sdfg.arrays[memlet.data], dace.data.Scalar):
                new_subset = copy.deepcopy(memlet.subset)
                # new_subset = calc_set_image(map_entry.map.params,
                #                             map_entry.map.range, memlet.subset,
                #                             cont_or_strided)
                if memlet.data in new_in_edges:
                    src, src_conn, dest, dest_conn, new_memlet, num = \
                        new_in_edges[memlet.data]
                    new_memlet.subset = calc_set_union(
                        new_memlet.data, sdfg.arrays[nnew_memlet.data],
                        new_memlet.subset, new_subset)
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_in_edges.update({
                        memlet.data: (src, src_conn, dest, dest_conn,
                                      new_memlet, min(num, int(conn[4:])))
                    })
                else:
                    new_memlet = dcpy(memlet)
                    new_memlet.subset = new_subset
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_in_edges.update({
                        memlet.data: (new_map_entry, None, map_entry, None,
                                      new_memlet, int(conn[4:]))
                    })
        nxutil.change_edge_dest(graph, map_entry, new_map_entry)

        new_out_edges = {}
        for _src, conn, _dest, _, memlet in graph.in_edges(map_exit):
            if not isinstance(sdfg.arrays[memlet.data], dace.data.Scalar):
                new_subset = memlet.subset
                # new_subset = calc_set_image(map_entry.map.params,
                #                             map_entry.map.range,
                #                             memlet.subset, cont_or_strided)
                if memlet.data in new_out_edges:
                    src, src_conn, dest, dest_conn, new_memlet, num = \
                        new_out_edges[memlet.data]
                    new_memlet.subset = calc_set_union(
                        new_memlet.data, sdfg.arrays[nnew_memlet.data],
                        new_memlet.subset, new_subset)
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_out_edges.update({
                        memlet.data: (src, src_conn, dest, dest_conn,
                                      new_memlet, min(num, conn[4:]))
                    })
                else:
                    new_memlet = dcpy(memlet)
                    new_memlet.subset = new_subset
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_out_edges.update({
                        memlet.data: (map_exit, None, new_exit, None,
                                      new_memlet, conn[4:])
                    })
        nxutil.change_edge_src(graph, map_exit, new_exit)

        # Connector related work follows
        # 1. Dictionary 'old_connector_number': 'new_connector_numer'
        # 2. New node in/out connectors
        # 3. New edges

        in_conn_nums = []
        for _, e in new_in_edges.items():
            _, _, _, _, _, num = e
            in_conn_nums.append(num)
        in_conn = {}
        for i, num in enumerate(in_conn_nums):
            in_conn.update({num: i + 1})

        entry_in_connectors = set()
        entry_out_connectors = set()
        for i in range(len(in_conn_nums)):
            entry_in_connectors.add('IN_' + str(i + 1))
            entry_out_connectors.add('OUT_' + str(i + 1))
        new_map_entry.in_connectors = entry_in_connectors
        new_map_entry.out_connectors = entry_out_connectors

        for _, e in new_in_edges.items():
            src, _, dst, _, memlet, num = e
            graph.add_edge(src, 'OUT_' + str(in_conn[num]), dst,
                           'IN_' + str(in_conn[num]), memlet)

        out_conn_nums = []
        for _, e in new_out_edges.items():
            _, _, dst, _, _, num = e
            if dst is not new_exit:
                continue
            out_conn_nums.append(num)
        out_conn = {}
        for i, num in enumerate(out_conn_nums):
            out_conn.update({num: i + 1})

        exit_in_connectors = set()
        exit_out_connectors = set()
        for i in range(len(out_conn_nums)):
            exit_in_connectors.add('IN_' + str(i + 1))
            exit_out_connectors.add('OUT_' + str(i + 1))
        new_exit.in_connectors = exit_in_connectors
        new_exit.out_connectors = exit_out_connectors

        for _, e in new_out_edges.items():
            src, _, dst, _, memlet, num = e
            graph.add_edge(src, 'OUT_' + str(out_conn[num]), dst,
                           'IN_' + str(out_conn[num]), memlet)

        # Return strip-mined dimension.
        return target_dim, new_dim, new_map

    @staticmethod
    def __modify_edges(sdfg, graph, candidate, target_dim, new_dim):
        map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]]

        processed = []
        for src, _dest, memlet, _scope in nxutil.traverse_sdfg_scope(
                graph, map_entry, True):
            if memlet in processed:
                continue
            processed.append(memlet)

            # Corner cases
            if isinstance(sdfg.arrays[memlet.data], dace.data.Stream):
                continue
            if memlet.wcr is not None:
                memlet.num_accesses = 1
                continue

            for i, dim in enumerate(memlet.subset):
                if isinstance(dim, tuple):
                    dim = tuple(
                        symbolic.pystr_to_symbolic(d).subs(
                            symbolic.pystr_to_symbolic(target_dim),
                            symbolic.pystr_to_symbolic(
                                '%s + %s' % (str(new_dim), str(target_dim))))
                        for d in dim)
                else:
                    dim = symbolic.pystr_to_symbolic(dim).subs(
                        symbolic.pystr_to_symbolic(target_dim),
                        symbolic.pystr_to_symbolic(
                            '%s + %s' % (str(new_dim), str(target_dim))))

                memlet.subset[i] = dim
        return
Esempio n. 8
0
    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        expr_index = self.expr_index
        graph = sdfg.nodes()[self.state_id]
        tasklet = gnode(MapReduceFusion._tasklet)
        tmap_exit = graph.nodes()[self.subgraph[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[self.subgraph[MapReduceFusion._in_array]]
        if expr_index == 0:  # Reduce without outer map
            rmap_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_entry]]
        elif expr_index == 1:  # Reduce with outer map
            rmap_out_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_out_entry]]
            rmap_out_exit = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_out_exit]]
            rmap_in_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_entry]]
            rmap_tasklet = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_tasklet]]

        if expr_index == 2:
            rmap_cr = graph.nodes()[self.subgraph[MapReduceFusion._reduce]]
        else:
            rmap_cr = graph.nodes()[self.subgraph[MapReduceFusion._rmap_in_cr]]
        out_array = gnode(MapReduceFusion._out_array)

        # Set nodes to remove according to the expression index
        nodes_to_remove = [in_array]
        if expr_index == 0:
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_in_entry))
        elif expr_index == 1:
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_out_entry))
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_in_entry))
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_out_exit))
        else:
            nodes_to_remove.append(gnode(MapReduceFusion._reduce))

        # If no other edges lead to mapexit, remove it. Otherwise, keep
        # it and remove reduction incoming/outgoing edges
        if expr_index != 2 and len(graph.in_edges(tmap_exit)) == 1:
            nodes_to_remove.append(tmap_exit)

        memlet_edge = None
        for edge in graph.in_edges(tmap_exit):
            if edge.data.data == in_array.data:
                memlet_edge = edge
                break
        if memlet_edge is None:
            raise RuntimeError('Reduction memlet cannot be None')

        if expr_index == 0:  # Reduce without outer map
            # Index order does not matter, merge as-is
            pass
        elif expr_index == 1:  # Reduce with outer map
            tmap = tmap_exit.map
            perm_outer, perm_inner = MapReduceFusion.find_permutation(
                tmap, rmap_out_entry.map, rmap_in_entry.map, memlet_edge.data)

            # Split tasklet map into tmap_out -> tmap_in (according to
            # reduction)
            omap = nodes.Map(
                tmap.label + '_nonreduce',
                [p for i, p in enumerate(tmap.params) if i in perm_outer],
                [r for i, r in enumerate(tmap.range) if i in perm_outer],
                tmap.schedule, tmap.unroll, tmap.is_async)
            tmap.params = [
                p for i, p in enumerate(tmap.params) if i in perm_inner
            ]
            tmap.range = [
                r for i, r in enumerate(tmap.range) if i in perm_inner
            ]
            omap_entry = nodes.MapEntry(omap)
            omap_exit = rmap_out_exit
            rmap_out_exit.map = omap

            # Reconnect graph to new map
            tmap_entry = graph.entry_node(tmap_exit)
            tmap_in_edges = list(graph.in_edges(tmap_entry))
            for e in tmap_in_edges:
                nxutil.change_edge_dest(graph, tmap_entry, omap_entry)
            for e in tmap_in_edges:
                graph.add_edge(omap_entry, e.src_conn, tmap_entry, e.dst_conn,
                               copy.copy(e.data))
        elif expr_index == 2:  # Reduce node
            # Find correspondence between map indices and array outputs
            tmap = tmap_exit.map
            perm = MapReduceFusion.find_permutation_reduce(
                tmap, rmap_cr, graph, memlet_edge.data)

            output_subset = [tmap.params[d] for d in perm]
            if len(output_subset) == 0:  # Output is a scalar
                output_subset = [0]

            array_edge = graph.out_edges(rmap_cr)[0]

            # Delete relevant edges and nodes
            graph.remove_edge(memlet_edge)
            graph.remove_nodes_from(nodes_to_remove)

            # Add new edges and nodes
            #   From tasklet to map exit
            graph.add_edge(
                memlet_edge.src, memlet_edge.src_conn, memlet_edge.dst,
                memlet_edge.dst_conn,
                Memlet(out_array.data, memlet_edge.data.num_accesses,
                       subsets.Indices(output_subset), memlet_edge.data.veclen,
                       rmap_cr.wcr, rmap_cr.identity))

            #   From map exit to output array
            graph.add_edge(
                memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:],
                array_edge.dst, array_edge.dst_conn,
                Memlet(array_edge.data.data, array_edge.data.num_accesses,
                       array_edge.data.subset, array_edge.data.veclen,
                       rmap_cr.wcr, rmap_cr.identity))

            return

        # Remove tmp array node prior to the others, so that a new one
        # can be created in its stead (see below)
        graph.remove_node(nodes_to_remove[0])
        nodes_to_remove = nodes_to_remove[1:]

        # Create tasklet -> tmp -> tasklet connection
        tmp = graph.add_array(
            'tmp',
            memlet_edge.data.subset.bounding_box_size(),
            sdfg.arrays[memlet_edge.data.data].dtype,
            transient=True)
        tasklet_tmp_memlet = copy.deepcopy(memlet_edge.data)
        tasklet_tmp_memlet.data = tmp.data
        tasklet_tmp_memlet.subset = ShapeProperty.to_string(tmp.shape)

        # Modify memlet to point to output array
        memlet_edge.data.data = out_array.data

        # Recover reduction axes from CR reduce subset
        reduce_cr_subset = graph.in_edges(rmap_tasklet)[0].data.subset
        reduce_axes = []
        for ind, crvar in enumerate(reduce_cr_subset.indices):
            if '__i' in str(crvar):
                reduce_axes.append(ind)

        # Modify memlet access index by filtering out reduction axes
        if True:  # expr_index == 0:
            newindices = []
            for ind, ovar in enumerate(memlet_edge.data.subset.indices):
                if ind not in reduce_axes:
                    newindices.append(ovar)
        if len(newindices) == 0:
            newindices = [0]

        memlet_edge.data.subset = subsets.Indices(newindices)

        graph.remove_edge(memlet_edge)

        graph.add_edge(memlet_edge.src, memlet_edge.src_conn, tmp,
                       memlet_edge.dst_conn, tasklet_tmp_memlet)

        red_edges = list(graph.in_edges(rmap_tasklet))
        if len(red_edges) != 1:
            raise RuntimeError('CR edge must be unique')

        tmp_tasklet_memlet = copy.deepcopy(tasklet_tmp_memlet)
        graph.add_edge(tmp, None, rmap_tasklet, red_edges[0].dst_conn,
                       tmp_tasklet_memlet)

        for e in graph.edges_between(rmap_tasklet, rmap_cr):
            e.data.subset = memlet_edge.data.subset

        # Move output edges to point directly to CR node
        if expr_index == 1:
            # Set output memlet between CR node and outer reduction map to
            # contain the same subset as the one pointing to the CR node
            for e in graph.out_edges(rmap_cr):
                e.data.subset = memlet_edge.data.subset

            rmap_out = gnode(MapReduceFusion._rmap_out_exit)
            nxutil.change_edge_src(graph, rmap_out, omap_exit)

        # Remove nodes
        graph.remove_nodes_from(nodes_to_remove)

        # For unrelated outputs, connect original output to rmap_out
        if expr_index == 1 and tmap_exit not in nodes_to_remove:
            other_out_edges = list(graph.out_edges(tmap_exit))
            for e in other_out_edges:
                graph.remove_edge(e)
                graph.add_edge(e.src, e.src_conn, omap_exit, None, e.data)
                graph.add_edge(omap_exit, None, e.dst, e.dst_conn,
                               copy.copy(e.data))
Esempio n. 9
0
class SubArray(object):
    """
    Sub-arrays describe subsets of Arrays (see `dace::data::Array`) for purposes of distributed communication. They are
    implemented with [MPI_Type_create_subarray](https://www.mpich.org/static/docs/v3.2/www3/MPI_Type_create_subarray.html).
    Sub-arrays can be also used for collective scatter/gather communication in a process-grid.

    The `shape`, `subshape`, and `dtype` properties correspond to the `array_of_sizes`, `array_of_subsizes`, and
    `oldtype` parameters of `MPI_Type_create_subarray`.

    The following properties are used for collective scatter/gather communication in a process-grid:

    The `pgrid` property is the name of the process-grid where the data will be distributed. The `correspondence`
    property matches the arrays dimensions to the process-grid's dimensions. For example, if one wants to distribute
    a matrix to a 2D process-grid, but tile the matrix rows over the grid's columns, then `correspondence = [1, 0]`.
    """

    name = Property(dtype=str, desc="The type's name.")
    dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses)
    shape = ShapeProperty(default=[], desc="The array's shape.")
    subshape = ShapeProperty(default=[], desc="The sub-array's shape.")
    pgrid = Property(
        dtype=str,
        allow_none=True,
        default=None,
        desc="Name of the process-grid where the data are distributed.")
    correspondence = ListProperty(
        int,
        allow_none=True,
        default=None,
        desc="Correspondence of the array's indices to the process grid's "
        "indices.")

    def __init__(self,
                 name: str,
                 dtype: dtypes.typeclass,
                 shape: ShapeType,
                 subshape: ShapeType,
                 pgrid: str = None,
                 correspondence: Sequence[Integral] = None):
        self.name = name
        self.dtype = dtype
        self.shape = shape
        self.subshape = subshape
        self.pgrid = pgrid
        self.correspondence = correspondence or list(range(len(shape)))
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError(
                'Shape must be a list or tuple of integer values or symbols')
        if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic))
               for s in self.subshape):
            raise TypeError(
                'Sub-shape must be a list or tuple of integer values or symbols'
            )
        if any(not isinstance(i, Integral) for i in self.correspondence):
            raise TypeError(
                'Correspondence must be a list or tuple of integer values')
        if len(self.shape) != len(self.subshape):
            raise ValueError(
                'The dimensionality of the shape and sub-shape must match')
        if len(self.correspondence) != len(self.shape):
            raise ValueError(
                'The dimensionality of the shape and correspondence list must match'
            )
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)
        retdict = {"type": type(self).__name__, "attributes": attrs}
        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls('tmp', dtypes.int8, [], [], 'tmp', [])
        serialize.set_properties_from_json(ret, json_obj, context=context)
        # Check validity now
        ret.validate()
        return ret

    def init_code(self):
        """ Outputs MPI allocation/initialization code for the sub-array MPI datatype ONLY if the process-grid is set.
            It is assumed that the following variables exist in the SDFG program's state:
            - MPI_Datatype {self.name}
            - int* {self.name}_counts
            - int* {self.name}_displs

            These variables are typically added to the program's state through a Tasklet, e.g., the Dummy MPI node (for
            more details, check the DaCe MPI library in `dace/libraries/mpi`).
        """
        from dace.libraries.mpi import utils
        if self.pgrid:
            return f"""
                if (__state->{self.pgrid}_valid) {{
                    int sizes[{len(self.shape)}] = {{{', '.join([str(s) for s in self.shape])}}};
                    int subsizes[{len(self.shape)}] = {{{', '.join([str(s) for s in self.subshape])}}};
                    int corr[{len(self.shape)}] = {{{', '.join([str(i) for i in self.correspondence])}}};

                    int basic_stride = subsizes[{len(self.shape)} - 1];

                    int process_strides[{len(self.shape)}];
                    int block_strides[{len(self.shape)}];
                    int data_strides[{len(self.shape)}];

                    process_strides[{len(self.shape)} - 1] = 1;
                    block_strides[{len(self.shape)} - 1] = subsizes[{len(self.shape)} - 1];
                    data_strides[{len(self.shape)} - 1] = 1;

                    for (auto i = {len(self.shape)} - 2; i >= 0; --i) {{
                        block_strides[i] = block_strides[i+1] * subsizes[i];
                        process_strides[i] = process_strides[i+1] * __state->{self.pgrid}_dims[corr[i+1]];
                        data_strides[i] = block_strides[i] * process_strides[i] / basic_stride;
                    }}

                    MPI_Datatype type;
                    int origin[{len(self.shape)}] = {{{','.join(['0'] * len(self.shape))}}};
                    MPI_Type_create_subarray({len(self.shape)}, sizes, subsizes, origin, MPI_ORDER_C, {utils.MPI_DDT(self.dtype.base_type)}, &type);
                    MPI_Type_create_resized(type, 0, basic_stride*sizeof({self.dtype.ctype}), &__state->{self.name});
                    MPI_Type_commit(&__state->{self.name});

                    __state->{self.name}_counts = new int[__state->{self.pgrid}_size];
                    __state->{self.name}_displs = new int[__state->{self.pgrid}_size];
                    int block_id[{len(self.shape)}] = {{0}};
                    int displ = 0;
                    for (auto i = 0; i < __state->{self.pgrid}_size; ++i) {{
                        __state->{self.name}_counts[i] = 1;
                        __state->{self.name}_displs[i] = displ;
                        int idx = {len(self.shape)} - 1;
                        while (idx >= 0 && block_id[idx] + 1 >= __state->{self.pgrid}_dims[corr[idx]]) {{
                            block_id[idx] = 0;
                            displ -= data_strides[idx] * (__state->{self.pgrid}_dims[corr[idx]] - 1);
                            idx--;
                        }}
                        if (idx >= 0) {{ 
                            block_id[idx] += 1;
                            displ += data_strides[idx];
                        }} else {{
                            assert(i == __state->{self.pgrid}_size - 1);
                        }}
                    }}
                }}
            """
        else:
            return ""

    def exit_code(self):
        """ Outputs MPI deallocation code for the sub-array MPI datatype ONLY if the process-grid is set. """
        if self.pgrid:
            return f"""
                if (__state->{self.pgrid}_valid) {{
                    delete[] __state->{self.name}_counts;
                    delete[] __state->{self.name}_displs;
                    MPI_Type_free(&__state->{self.name});
                }}
            """
        else:
            return ""
Esempio n. 10
0
class MapTiling(transformation.SingleStateTransformation):
    """ Implements the orthogonal tiling transformation.

        Orthogonal tiling is a type of nested map fission that creates tiles
        in every dimension of the matched Map.
    """
    map_entry = transformation.PatternNode(nodes.MapEntry)

    # Properties
    prefix = Property(dtype=str,
                      default="tile",
                      desc="Prefix for new range symbols")
    tile_sizes = ShapeProperty(dtype=tuple,
                               default=(128, 128, 128),
                               desc="Tile size per dimension")

    strides = ShapeProperty(
        dtype=tuple,
        default=tuple(),
        desc="Tile stride (enables overlapping tiles). If empty, matches tile")

    tile_offset = ShapeProperty(dtype=tuple,
                                default=None,
                                desc="Negative Stride offset per dimension",
                                allow_none=True)

    divides_evenly = Property(dtype=bool,
                              default=False,
                              desc="Tile size divides dimension length evenly")
    tile_trivial = Property(dtype=bool,
                            default=False,
                            desc="Tiles even if tile_size is 1")

    @staticmethod
    def annotates_memlets():
        return True

    @classmethod
    def expressions(cls):
        return [sdutil.node_path_graph(cls.map_entry)]

    def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
        return True

    def apply(self, graph: SDFGState, sdfg: SDFG):
        tile_strides = self.tile_sizes
        if self.strides is not None and len(self.strides) == len(tile_strides):
            tile_strides = self.strides

        # Retrieve map entry and exit nodes.
        map_entry = self.map_entry
        from dace.transformation.dataflow.map_collapse import MapCollapse
        from dace.transformation.dataflow.strip_mining import StripMining
        stripmine_subgraph = {
            StripMining.map_entry: self.subgraph[MapTiling.map_entry]
        }
        sdfg_id = sdfg.sdfg_id
        last_map_entry = None
        removed_maps = 0

        original_schedule = map_entry.schedule

        for dim_idx in range(len(map_entry.map.params)):
            if dim_idx >= len(self.tile_sizes):
                tile_size = symbolic.pystr_to_symbolic(self.tile_sizes[-1])
                tile_stride = symbolic.pystr_to_symbolic(tile_strides[-1])
            else:
                tile_size = symbolic.pystr_to_symbolic(
                    self.tile_sizes[dim_idx])
                tile_stride = symbolic.pystr_to_symbolic(tile_strides[dim_idx])

            # handle offsets
            if self.tile_offset and dim_idx >= len(self.tile_offset):
                offset = self.tile_offset[-1]
            elif self.tile_offset:
                offset = self.tile_offset[dim_idx]
            else:
                offset = 0

            dim_idx -= removed_maps
            # If tile size is trivial, skip strip-mining map dimension
            if tile_size == map_entry.map.range.size()[dim_idx]:
                continue

            stripmine = StripMining(sdfg, sdfg_id, self.state_id,
                                    stripmine_subgraph, self.expr_index)

            # Special case: Tile size of 1 should be omitted from inner map
            if tile_size == 1 and tile_stride == 1 and self.tile_trivial == False:
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = ''
                stripmine.tile_size = str(tile_size)
                stripmine.tile_stride = str(tile_stride)
                stripmine.divides_evenly = True
                stripmine.tile_offset = str(offset)
                stripmine.apply(graph, sdfg)
                removed_maps += 1
            else:
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = self.prefix
                stripmine.tile_size = str(tile_size)
                stripmine.tile_stride = str(tile_stride)
                stripmine.divides_evenly = self.divides_evenly
                stripmine.tile_offset = str(offset)
                stripmine.apply(graph, sdfg)

            # apply to the new map the schedule of the original one
            map_entry.schedule = original_schedule

            if last_map_entry:
                new_map_entry = graph.in_edges(map_entry)[0].src
                mapcollapse_subgraph = {
                    MapCollapse.outer_map_entry: graph.node_id(last_map_entry),
                    MapCollapse.inner_map_entry: graph.node_id(new_map_entry)
                }
                mapcollapse = MapCollapse(sdfg, sdfg_id, self.state_id,
                                          mapcollapse_subgraph, 0)
                mapcollapse.apply(graph, sdfg)
            last_map_entry = graph.in_edges(map_entry)[0].src
        return last_map_entry
Esempio n. 11
0
class Array(Data):
    """
    Array data descriptor. This object represents a multi-dimensional data container in SDFGs that can be accessed and
    modified. The definition does not contain the actual array, but rather a description of how to construct it and
    how it should behave.

    The array definition is flexible in terms of data allocation, it allows arbitrary multidimensional, potentially
    symbolic shapes (e.g., an array with size ``N+1 x M`` will have ``shape=(N+1, M)``), of arbitrary data 
    typeclasses (``dtype``). The physical data layout of the array is controlled by several properties:
       * The ``strides`` property determines the ordering and layout of the dimensions --- it specifies how many
         elements in memory are skipped whenever one element in that dimension is advanced. For example, the contiguous
         dimension always has a stride of ``1``; a C-style MxN array will have strides ``(N, 1)``, whereas a 
         FORTRAN-style array of the same size will have ``(1, M)``. Strides can be larger than the shape, which allows
         post-padding of the contents of each dimension.
       * The ``start_offset`` property is a number of elements to pad the beginning of the memory buffer with. This is
         used to ensure that a specific index is aligned as a form of pre-padding (that element may not necessarily be
         the first element, e.g., in the case of halo or "ghost cells" in stencils).
       * The ``total_size`` property determines how large the total allocation size is. Normally, it is the product of
         the ``shape`` elements, but if pre- or post-padding is involved it may be larger.
       * ``alignment`` provides alignment guarantees (in bytes) of the first element in the allocated array. This is
         used by allocators in the code generator to ensure certain addresses are expected to be aligned, e.g., for
         vectorization.
       * Lastly, a property called ``offset`` controls the logical access of the array, i.e., what would be the first
         element's index after padding and alignment. This mimics a language feature prominent in scientific languages
         such as FORTRAN, where one could set an array to begin with 1, or any arbitrary index. By default this is set
         to zero.

    To summarize with an example, a two-dimensional array with pre- and post-padding looks as follows:
    
    .. code-block:: text

        [xxx][          |xx]
             [          |xx]
             [          |xx]
             [          |xx]
             ---------------
             [xxxxxxxxxxxxx]

        shape = (4, 10)
        strides = (12, 1)
        start_offset = 3
        total_size = 63   [= 3 + 12 * 5]
        offset = (0, 0, 0)


    Notice that the last padded row does not appear in strides, but is a consequence of ``total_size`` being larger.
    

    Apart from memory layout, other properties of ``Array`` help the data-centric transformation infrastructure make
    decisions about the array. ``allow_conflicts`` states that warnings should not be printed if potential conflicted
    acceses (e.g., data races) occur. ``may_alias`` inhibits transformations that may assume that this array does not
    overlap with other arrays in the same context (e.g., function).
    """

    # Properties
    allow_conflicts = Property(dtype=bool,
                               default=False,
                               desc='If enabled, allows more than one '
                               'memlet to write to the same memory location without conflict '
                               'resolution.')

    strides = ShapeProperty(
        # element_type=symbolic.pystr_to_symbolic,
        desc='For each dimension, the number of elements to '
        'skip in order to obtain the next element in '
        'that dimension.')

    total_size = SymbolicProperty(default=0, desc='The total allocated size of the array. Can be used for padding.')

    offset = ShapeProperty(desc='Initial offset to translate all indices by.')

    may_alias = Property(dtype=bool,
                         default=False,
                         desc='This pointer may alias with other pointers in the same function')

    alignment = Property(dtype=int, default=0, desc='Allocation alignment in bytes (0 uses compiler-default)')

    start_offset = Property(dtype=int, default=0, desc='Allocation offset elements for manual alignment (pre-padding)')

    def __init__(self,
                 dtype,
                 shape,
                 transient=False,
                 allow_conflicts=False,
                 storage=dtypes.StorageType.Default,
                 location=None,
                 strides=None,
                 offset=None,
                 may_alias=False,
                 lifetime=dtypes.AllocationLifetime.Scope,
                 alignment=0,
                 debuginfo=None,
                 total_size=None,
                 start_offset=None):

        super(Array, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo)

        if shape is None:
            raise IndexError('Shape must not be None')

        self.allow_conflicts = allow_conflicts
        self.may_alias = may_alias
        self.alignment = alignment
        if start_offset is not None:
            self.start_offset = start_offset

        if strides is not None:
            self.strides = cp.copy(strides)
        else:
            self.strides = [_prod(shape[i + 1:]) for i in range(len(shape))]

        if strides is not None and shape is not None and total_size is None:
            # Compute the minimal total_size that could be used with strides and shape
            self.total_size = sum(((shp - 1) * s for shp, s in zip(shape, strides))) + 1
        else:
            self.total_size = total_size or _prod(shape)

        if offset is not None:
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        self.validate()

    def __repr__(self):
        return '%s (dtype=%s, shape=%s)' % (type(self).__name__, self.dtype, self.shape)

    def clone(self):
        return type(self)(self.dtype, self.shape, self.transient, self.allow_conflicts, self.storage, self.location,
                          self.strides, self.offset, self.may_alias, self.lifetime, self.alignment, self.debuginfo,
                          self.total_size, self.start_offset)

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls(dtypes.int8, ())
        serialize.set_properties_from_json(ret, json_obj, context=context)

        # Default shape-related properties
        if not ret.offset:
            ret.offset = [0] * len(ret.shape)
        if not ret.strides:
            # Default strides are C-ordered
            ret.strides = [_prod(ret.shape[i + 1:]) for i in range(len(ret.shape))]
        if ret.total_size == 0:
            ret.total_size = _prod(ret.shape)

        # Check validity now
        ret.validate()
        return ret

    def validate(self):
        super(Array, self).validate()
        if len(self.strides) != len(self.shape):
            raise TypeError('Strides must be the same size as shape')

        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.strides):
            raise TypeError('Strides must be a list or tuple of integer ' 'values or symbols')

        if len(self.offset) != len(self.shape):
            raise TypeError('Offset must be the same size as shape')

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sp.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sp.Symbol(str(s), **s.assumptions0)
                else:
                    s = sp.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sp.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sp.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sp.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, Array):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # Any other case (constant vs. constant), check for equality
            if otherdim != dim:
                return False
        return True

    def as_arg(self, with_types=True, for_call=False, name=None):
        arrname = name

        if not with_types or for_call:
            return arrname
        if self.may_alias:
            return str(self.dtype.ctype) + ' *' + arrname
        return str(self.dtype.ctype) + ' * __restrict__ ' + arrname

    def sizes(self):
        return [d.name if isinstance(d, symbolic.symbol) else str(d) for d in self.shape]

    @property
    def free_symbols(self):
        result = super().free_symbols
        for s in self.strides:
            if isinstance(s, sp.Expr):
                result |= set(s.free_symbols)
        if isinstance(self.total_size, sp.Expr):
            result |= set(self.total_size.free_symbols)
        for o in self.offset:
            if isinstance(o, sp.Expr):
                result |= set(o.free_symbols)

        return result
Esempio n. 12
0
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses)
    shape = ShapeProperty(default=[])
    transient = Property(dtype=bool, default=False)
    storage = EnumProperty(dtype=dtypes.StorageType, desc="Storage location", default=dtypes.StorageType.Default)
    lifetime = EnumProperty(dtype=dtypes.AllocationLifetime,
                            desc='Data allocation span',
                            default=dtypes.AllocationLifetime.Scope)
    location = DictProperty(key_type=str, value_type=str, desc='Full storage location identifier (e.g., rank, GPU ID)')
    debuginfo = DebugInfoProperty(allow_none=True)

    def __init__(self, dtype, shape, transient, storage, location, lifetime, debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location if location is not None else {}
        self.lifetime = lifetime
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values ' 'or symbols')
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @property
    def toplevel(self):
        return self.lifetime is not dtypes.AllocationLifetime.Scope

    def copy(self):
        raise RuntimeError('Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def as_arg(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    @property
    def free_symbols(self) -> Set[symbolic.SymbolicType]:
        """ Returns a set of undefined symbols in this data descriptor. """
        result = set()
        for s in self.shape:
            if isinstance(s, sp.Basic):
                result |= set(s.free_symbols)
        return result

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'

    @property
    def veclen(self):
        return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1

    @property
    def ctype(self):
        return self.dtype.ctype

    def strides_from_layout(
        self,
        *dimensions: int,
        alignment: symbolic.SymbolicType = 1,
        only_first_aligned: bool = False,
    ) -> Tuple[Tuple[symbolic.SymbolicType], symbolic.SymbolicType]:
        """
        Returns the absolute strides and total size of this data descriptor,
        according to the given dimension ordering and alignment.
        :param dimensions: A sequence of integers representing a permutation
                           of the descriptor's dimensions.
        :param alignment: Padding (in elements) at the end, ensuring stride
                          is a multiple of this number. 1 (default) means no
                          padding.
        :param only_first_aligned: If True, only the first dimension is padded
                                   with ``alignment``. Otherwise all dimensions
                                   are.
        :return: A 2-tuple of (tuple of strides, total size).
        """
        # Verify dimensions
        if tuple(sorted(dimensions)) != tuple(range(len(self.shape))):
            raise ValueError('Every dimension must be given and appear once.')
        if (alignment < 1) == True or (alignment < 0) == True:
            raise ValueError('Invalid alignment value')

        strides = [1] * len(dimensions)
        total_size = 1
        first = True
        for dim in dimensions:
            strides[dim] = total_size
            if not only_first_aligned or first:
                dimsize = (((self.shape[dim] + alignment - 1) // alignment) * alignment)
            else:
                dimsize = self.shape[dim]
            total_size *= dimsize
            first = False

        return (tuple(strides), total_size)

    def set_strides_from_layout(self,
                                *dimensions: int,
                                alignment: symbolic.SymbolicType = 1,
                                only_first_aligned: bool = False):
        """
        Sets the absolute strides and total size of this data descriptor,
        according to the given dimension ordering and alignment.
        :param dimensions: A sequence of integers representing a permutation
                           of the descriptor's dimensions.
        :param alignment: Padding (in elements) at the end, ensuring stride
                          is a multiple of this number. 1 (default) means no
                          padding.
        :param only_first_aligned: If True, only the first dimension is padded
                                   with ``alignment``. Otherwise all dimensions
                                   are.
        """
        strides, totalsize = self.strides_from_layout(*dimensions,
                                                      alignment=alignment,
                                                      only_first_aligned=only_first_aligned)
        self.strides = strides
        self.total_size = totalsize
Esempio n. 13
0
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty()
    shape = ShapeProperty()
    transient = Property(dtype=bool)
    storage = Property(dtype=dace.types.StorageType,
                       desc="Storage location",
                       enum=dace.types.StorageType,
                       default=dace.types.StorageType.Default,
                       from_string=lambda x: types.StorageType[x])
    location = Property(
        dtype=str,  # Dict[str, symbolic]
        desc='Full storage location identifier (e.g., rank, GPU ID)',
        default='')
    toplevel = Property(dtype=bool,
                        desc="Allocate array outside of state",
                        default=False)
    debuginfo = DebugInfoProperty()

    def __init__(self, dtype, shape, transient, storage, location, toplevel,
                 debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location
        self.toplevel = toplevel
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values '
                            'or symbols')
        return True

    def copy(self):
        raise RuntimeError(
            'Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def signature(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'
Esempio n. 14
0
class CompositeFusion(transformation.SubgraphTransformation):
    """ MultiExpansion + SubgraphFusion in one Transformation
        Additional StencilTiling is also possible as a canonicalizing
        transformation before fusion.
    """

    debug = Property(desc="Debug mode", dtype=bool, default=False)

    allow_expansion = Property(desc="Allow MultiExpansion first",
                               dtype=bool,
                               default=True)

    allow_tiling = Property(desc="Allow StencilTiling (after MultiExpansion)",
                            dtype=bool,
                            default=False)

    transient_allocation = EnumProperty(
        desc="Storage Location to push transients to that are "
        "fully contained within the subgraph.",
        dtype=dtypes.StorageType,
        default=dtypes.StorageType.Default)

    schedule_innermaps = Property(desc="Schedule of inner fused maps",
                                  dtype=dtypes.ScheduleType,
                                  default=None,
                                  allow_none=True)

    stencil_unroll_loops = Property(
        desc="Unroll inner stencil loops if they have size > 1",
        dtype=bool,
        default=False)
    stencil_strides = ShapeProperty(dtype=tuple,
                                    default=(1, ),
                                    desc="Stencil tile stride")

    expansion_split = Property(
        desc="Allow MultiExpansion to split up maps, if enabled",
        dtype=bool,
        default=True)

    def can_be_applied(self, sdfg: SDFG, subgraph: SubgraphView) -> bool:
        graph = subgraph.graph
        if self.allow_expansion == True:
            subgraph_fusion = SubgraphFusion(subgraph)
            if subgraph_fusion.can_be_applied(sdfg, subgraph):
                # try w/o copy first
                return True

            expansion = MultiExpansion(subgraph)
            expansion.permutation_only = not self.expansion_split
            if expansion.can_be_applied(sdfg, subgraph):
                # deepcopy
                graph_indices = [
                    i for (i, n) in enumerate(graph.nodes()) if n in subgraph
                ]
                sdfg_copy = SDFG.from_json(sdfg.to_json())
                graph_copy = sdfg_copy.nodes()[sdfg.nodes().index(graph)]
                subgraph_copy = SubgraphView(
                    graph_copy, [graph_copy.nodes()[i] for i in graph_indices])

                ##sdfg_copy.apply_transformations(MultiExpansion, states=[graph])
                #expansion = MultiExpansion(subgraph_copy)
                expansion.apply(sdfg_copy)

                subgraph_fusion = SubgraphFusion(subgraph_copy)
                if subgraph_fusion.can_be_applied(sdfg_copy, subgraph_copy):
                    return True

                stencil_tiling = StencilTiling(subgraph_copy)
                if self.allow_tiling and stencil_tiling.can_be_applied(
                        sdfg_copy, subgraph_copy):
                    return True

        else:
            subgraph_fusion = SubgraphFusion(subgraph)
            if subgraph_fusion.can_be_applied(sdfg, subgraph):
                return True

        if self.allow_tiling == True:
            stencil_tiling = StencilTiling(subgraph)
            if stencil_tiling.can_be_applied(sdfg, subgraph):
                return True

        return False

    def apply(self, sdfg):
        subgraph = self.subgraph_view(sdfg)
        graph = subgraph.graph
        scope_dict = graph.scope_dict()
        map_entries = helpers.get_outermost_scope_maps(sdfg, graph, subgraph,
                                                       scope_dict)
        first_entry = next(iter(map_entries))

        if self.allow_expansion:
            expansion = MultiExpansion(subgraph, self.sdfg_id, self.state_id)
            expansion.permutation_only = not self.expansion_split
            if expansion.can_be_applied(sdfg, subgraph):
                expansion.apply(sdfg)

        sf = SubgraphFusion(subgraph, self.sdfg_id, self.state_id)
        if sf.can_be_applied(sdfg, self.subgraph_view(sdfg)):
            # set SubgraphFusion properties
            sf.debug = self.debug
            sf.transient_allocation = self.transient_allocation
            sf.schedule_innermaps = self.schedule_innermaps
            sf.apply(sdfg)
            self._global_map_entry = sf._global_map_entry
            return

        elif self.allow_tiling == True:
            st = StencilTiling(subgraph, self.sdfg_id, self.state_id)
            if st.can_be_applied(sdfg, self.subgraph_view(sdfg)):
                # set StencilTiling properties
                st.debug = self.debug
                st.unroll_loops = self.stencil_unroll_loops
                st.strides = self.stencil_strides
                st.apply(sdfg)
                # StencilTiling: update nodes
                new_entries = st._outer_entries
                subgraph = helpers.subgraph_from_maps(sdfg, graph, new_entries)
                sf = SubgraphFusion(subgraph, self.sdfg_id, self.state_id)
                # set SubgraphFusion properties
                sf.debug = self.debug
                sf.transient_allocation = self.transient_allocation
                sf.schedule_innermaps = self.schedule_innermaps

                sf.apply(sdfg)
                self._global_map_entry = sf._global_map_entry
                return

        warnings.warn("CompositeFusion::Apply did not perform as expected")
Esempio n. 15
0
class StencilTiling(transformation.SubgraphTransformation):
    """ Operates on top level maps of the given subgraph.
        Applies orthogonal tiling to each of the maps with
        the given strides and extends the newly created
        inner tiles to account for data dependencies
        due to stencil patterns. For each map all outgoing
        memlets to an array must cover the memlets that
        are incoming into a following child map.

        All maps must have the same map parameters in
        the same order.
    """

    # Properties
    debug = Property(desc="Debug mode", dtype=bool, default=False)

    prefix = Property(dtype=str,
                      default="stencil",
                      desc="Prefix for new inner tiled range symbols")

    strides = ShapeProperty(dtype=tuple, default=(1, ), desc="Tile stride")

    schedule = Property(dtype=dace.dtypes.ScheduleType,
                        default=dace.dtypes.ScheduleType.Default,
                        desc="Dace.Dtypes.ScheduleType of Inner Maps")

    unroll_loops = Property(desc="Unroll Inner Loops if they have Size > 1",
                            dtype=bool,
                            default=False)

    @staticmethod
    def coverage_dicts(sdfg, graph, map_entry, outer_range=True):
        '''
        returns a tuple of two dicts:
        the first dict has as a key all data entering the map
        and its associated access range
        the second dict has as a key all data exiting the map
        and its associated access range
        if outer_range = True, substitutes outer ranges
        into min/max of inner access range
        '''
        map_exit = graph.exit_node(map_entry)
        map = map_entry.map

        entry_coverage = {}
        exit_coverage = {}
        # create dicts with which we can replace all iteration
        # variable_mapping by their range
        map_min = {
            dace.symbol(param): e
            for param, e in zip(map.params, map.range.min_element())
        }
        map_max = {
            dace.symbol(param): e
            for param, e in zip(map.params, map.range.max_element())
        }

        # look at inner memlets at map entry
        for e in graph.out_edges(map_entry):
            if outer_range:
                # get subset
                min_element = [
                    m.subs(map_min) for m in e.data.subset.min_element()
                ]
                max_element = [
                    m.subs(map_max) for m in e.data.subset.max_element()
                ]
                # create range
                rng = subsets.Range(
                    (min_e, max_e, 1)
                    for min_e, max_e in zip(min_element, max_element))
            else:
                rng = dcpy(e.data.subset)

            if e.data.data not in entry_coverage:
                entry_coverage[e.data.data] = rng
            else:
                old_coverage = entry_coverage[e.data.data]
                entry_coverage[e.data.data] = subsets.union(old_coverage, rng)

        # look at inner memlets at map exit
        for e in graph.in_edges(map_exit):
            if outer_range:
                # get subset
                min_element = [
                    m.subs(map_min) for m in e.data.subset.min_element()
                ]
                max_element = [
                    m.subs(map_max) for m in e.data.subset.max_element()
                ]
                # craete range
                rng = subsets.Range(
                    (min_e, max_e, 1)
                    for min_e, max_e in zip(min_element, max_element))
            else:
                rng = dcpy(e.data.subset)

            if e.data.data not in exit_coverage:
                exit_coverage[e.data.data] = rng
            else:
                old_coverage = exit_coverage[e.data]
                exit_coverage[e.data.data] = subsets.union(old_coverage, rng)

        # return both coverages as a tuple
        return (entry_coverage, exit_coverage)

    @staticmethod
    def topology(sdfg, graph, map_entries):
        # first get dicts of parents and children for each map_entry
        # get source maps as a starting point for BFS
        # these are all map entries reachable from source nodes
        sink_maps = set()
        children_dict = defaultdict(set)
        parent_dict = defaultdict(set)
        map_exits = {graph.exit_node(entry): entry for entry in map_entries}

        for map_entry in map_entries:
            map_exit = graph.exit_node(map_entry)
            for e in graph.in_edges(map_entry):
                if isinstance(e.src, nodes.AccessNode):
                    for ie in graph.in_edges(e.src):
                        if ie.src in map_exits:
                            other_entry = map_exits[ie.src]
                            children_dict[other_entry].add(map_entry)
                            parent_dict[map_entry].add(other_entry)
            out_counter = 0
            for e in graph.out_edges(map_exit):
                if isinstance(e.dst, nodes.AccessNode):
                    for oe in graph.out_edges(e.dst):
                        if oe.dst in map_entries:
                            other_entry = oe.dst
                            children_dict[map_entry].add(other_entry)
                            parent_dict[other_entry].add(map_entry)
                            out_counter += 1
            if out_counter == 0:
                sink_maps.add(map_entry)

        return (children_dict, parent_dict, sink_maps)

    @staticmethod
    def can_be_applied(sdfg, subgraph) -> bool:
        # get highest scope maps
        graph = subgraph.graph
        map_entries = set(
            helpers.get_outermost_scope_maps(sdfg, graph, subgraph))
        # 1.1: There has to be more than one outermost scope map entry
        if len(map_entries) <= 1:
            return False

        # 1.2: check basic constraints:
        # - all parameters have to be the same (this implies same length)
        # - no parameter permutations here as ambiguity is very high then
        # - same strides everywhere
        first_map = next(iter(map_entries))
        params = dcpy(first_map.map.params)
        strides = first_map.map.range.strides()
        schedule = first_map.map.schedule

        for map_entry in map_entries:
            if map_entry.map.params != params:
                return False
            if map_entry.map.range.strides() != strides:
                return False
            if map_entry.map.schedule != schedule:
                return False

        # 1.3: check whether all map entries only differ by a const amount
        first_entry = next(iter(map_entries))
        for map_entry in map_entries:
            for r1, r2 in zip(map_entry.map.range, first_entry.map.range):
                if len((r1[0] - r2[0]).free_symbols) > 0:
                    return False
                if len((r1[1] - r2[1]).free_symbols) > 0:
                    return False

        # get intermediate_nodes, out_nodes from SubgraphFusion Transformation
        node_config = SubgraphFusion.get_adjacent_nodes(
            sdfg, graph, map_entries)
        (_, intermediate_nodes, out_nodes) = node_config

        # 1.4: check topological feasibility
        if not SubgraphFusion.check_topo_feasibility(
                sdfg, graph, map_entries, intermediate_nodes, out_nodes):
            return False
        # 1.5 nodes that are both intermediate and out nodes
        # are not supported in StencilTiling
        if len(intermediate_nodes & out_nodes) > 0:
            return False
        # get coverages for every map entry
        coverages = {}
        memlets = {}
        for map_entry in map_entries:
            coverages[map_entry] = StencilTiling.coverage_dicts(
                sdfg, graph, map_entry)
            memlets[map_entry] = StencilTiling.coverage_dicts(
                sdfg, graph, map_entry, outer_range=False)

        # get DAG neighbours for each map
        dag_neighbors = StencilTiling.topology(sdfg, graph, map_entries)
        (children_dict, _, sink_maps) = dag_neighbors

        # 1.6: we now check coverage:
        # each outgoing coverage for a data memlet has to
        # be exactly equal to the union of incoming coverages
        # of all chidlren map memlets of this data

        # important:
        # 1. it has to be equal and not only cover it in order to
        #    account for ranges too long
        # 2. we check coverages by map parameter and not by
        #    array, this way it is even more general
        # 3. map parameter coverages are checked for each
        #    (map_entry, children of this map_entry) - pair
        for map_entry in map_entries:
            # get coverage from current map_entry
            map_coverage = coverages[map_entry][1]

            # final mapping map_parameter -> coverage will be stored here
            param_parent_coverage = {p: None for p in map_entry.params}
            param_children_coverage = {p: None for p in map_entry.params}
            for child_entry in children_dict[map_entry]:
                # get mapping data_name -> coverage
                for (data_name, cov) in map_coverage.items():
                    parent_coverage = cov
                    children_coverage = None
                    if data_name in coverages[child_entry][0]:
                        children_coverage = subsets.union(
                            children_coverage,
                            coverages[child_entry][0][data_name])

                    # extend mapping map_parameter -> coverage
                    # by the previous mapping

                    for i, (p_subset, c_subset) in enumerate(
                            zip(parent_coverage, children_coverage)):

                        # transform into subset
                        p_subset = subsets.Range((p_subset, ))
                        c_subset = subsets.Range((c_subset, ))

                        # get associated parameter in memlet
                        params1 = symbolic.symlist(
                            memlets[map_entry][1][data_name][i]).keys()
                        params2 = symbolic.symlist(
                            memlets[child_entry][0][data_name][i]).keys()
                        if params1 != params2:
                            return False
                        params = params1
                        if len(params) > 1:
                            # this is not supported
                            return False
                        try:
                            symbol = next(iter(params))
                            param_parent_coverage[symbol] = subsets.union(
                                param_parent_coverage[symbol], p_subset)
                            param_children_coverage[symbol] = subsets.union(
                                param_children_coverage[symbol], c_subset)

                        except StopIteration:
                            # current dim has no symbol associated.
                            # ignore and continue
                            warnings.warn(
                                f"In map {map_entry}, there is a "
                                "dimension belonging to {data_name} "
                                "that has no map parameter associated.")
                            pass

            # 1.6: parameter mapping must be the same
            if param_parent_coverage != param_children_coverage:
                return False

        # 1.7: we want all sink maps to have the same range size
        assert len(sink_maps) > 0
        first_sink_map = next(iter(sink_maps))
        if not all([
                map.range.size() == first_sink_map.range.size()
                for map in sink_maps
        ]):
            return False

        return True

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        subgraph = self.subgraph_view(sdfg)
        map_entries = helpers.get_outermost_scope_maps(sdfg, graph, subgraph)

        result = StencilTiling.topology(sdfg, graph, map_entries)
        (children_dict, parent_dict, sink_maps) = result

        # next up, calculate inferred ranges for each map
        # for each map entry, this contains a tuple of dicts:
        # each of those maps from data_name of the array to
        # inferred outer ranges. An inferred outer range is created
        # by taking the union of ranges of inner subsets corresponding
        # to that data and substituting this subset by the min / max of the
        # parametrized map boundaries
        # finally, from these outer ranges we can easily calculate
        # strides and tile sizes required for every map
        inferred_ranges = defaultdict(dict)

        # create array of reverse topologically sorted map entries
        # to iterate over
        topo_reversed = []
        queue = set(sink_maps.copy())
        while len(queue) > 0:
            element = next(e for e in queue
                           if not children_dict[e] - set(topo_reversed))
            topo_reversed.append(element)
            queue.remove(element)
            for parent in parent_dict[element]:
                queue.add(parent)

        # main loop
        # first get coverage dicts for each map entry
        # for each map, contains a tuple of two dicts
        # each of those two maps from data name to outer range
        coverage = {}
        for map_entry in map_entries:
            coverage[map_entry] = StencilTiling.coverage_dicts(
                sdfg, graph, map_entry, outer_range=True)

        # we have a mapping from data name to outer range
        # however we want a mapping from map parameters to outer ranges
        # for this we need to find out how all array dimensions map to
        # outer ranges

        variable_mapping = defaultdict(list)
        for map_entry in topo_reversed:
            map = map_entry.map

            # first find out variable mapping
            for e in itertools.chain(
                    graph.out_edges(map_entry),
                    graph.in_edges(graph.exit_node(map_entry))):
                mapping = []
                for dim in e.data.subset:
                    syms = set()
                    for d in dim:
                        syms |= symbolic.symlist(d).keys()
                    if len(syms) > 1:
                        raise NotImplementedError(
                            "One incoming or outgoing stencil subset is indexed "
                            "by multiple map parameters. "
                            "This is not supported yet.")
                    try:
                        mapping.append(syms.pop())
                    except KeyError:
                        # just append None if there is no map symbol in it.
                        # we don't care for now.
                        mapping.append(None)

                if e.data in variable_mapping:
                    # assert that this is the same everywhere.
                    # else we might run into problems
                    assert variable_mapping[e.data.data] == mapping
                else:
                    variable_mapping[e.data.data] = mapping

            # now do mapping data name -> outer range
            # and from that infer mapping variable -> outer range
            local_ranges = {dn: None for dn in coverage[map_entry][1].keys()}
            for data_name, cov in coverage[map_entry][1].items():
                local_ranges[data_name] = subsets.union(
                    local_ranges[data_name], cov)
                # now look at proceeding maps
                # and union those subsets -> could be larger with stencil indent
                for child_map in children_dict[map_entry]:
                    if data_name in coverage[child_map][0]:
                        local_ranges[data_name] = subsets.union(
                            local_ranges[data_name],
                            coverage[child_map][0][data_name])

            # final assignent: combine local_ranges and variable_mapping
            # together into inferred_ranges
            inferred_ranges[map_entry] = {p: None for p in map.params}
            for data_name, ranges in local_ranges.items():
                for param, r in zip(variable_mapping[data_name], ranges):
                    # create new range from this subset and assign
                    rng = subsets.Range((r, ))
                    if param:
                        inferred_ranges[map_entry][param] = subsets.union(
                            inferred_ranges[map_entry][param], rng)

        # get parameters -- should all be the same
        params = next(iter(map_entries)).map.params.copy()
        # define reference range as inferred range of one of the sink maps
        self.reference_range = inferred_ranges[next(iter(sink_maps))]
        if self.debug:
            print("StencilTiling::Reference Range", self.reference_range)
        # next up, search for the ranges that don't change
        invariant_dims = []
        for idx, p in enumerate(params):
            different = False
            if self.reference_range[p] is None:
                invariant_dims.append(idx)
                warnings.warn(
                    f"StencilTiling::No Stencil pattern detected for parameter {p}"
                )
                continue
            for m in map_entries:
                if inferred_ranges[m][p] != self.reference_range[p]:
                    different = True
                    break
            if not different:
                invariant_dims.append(idx)
                warnings.warn(
                    f"StencilTiling::No Stencil pattern detected for parameter {p}"
                )

        # during stripmining, we will create new outer map entries
        # for easy access
        self._outer_entries = set()
        # with inferred_ranges constructed, we can begin to strip mine
        for map_entry in map_entries:
            # Retrieve map entry and exit nodes.
            map = map_entry.map

            stripmine_subgraph = {
                StripMining._map_entry: graph.nodes().index(map_entry)
            }

            sdfg_id = sdfg.sdfg_id
            last_map_entry = None
            original_schedule = map_entry.schedule
            self.tile_sizes = []
            self.tile_offset_lower = []
            self.tile_offset_upper = []

            # strip mining each dimension where necessary
            removed_maps = 0
            for dim_idx, param in enumerate(map_entry.map.params):
                # get current_node tile size
                if dim_idx >= len(self.strides):
                    tile_stride = symbolic.pystr_to_symbolic(self.strides[-1])
                else:
                    tile_stride = symbolic.pystr_to_symbolic(
                        self.strides[dim_idx])

                trivial = False

                if dim_idx in invariant_dims:
                    self.tile_sizes.append(tile_stride)
                    self.tile_offset_lower.append(0)
                    self.tile_offset_upper.append(0)
                else:
                    target_range_current = inferred_ranges[map_entry][param]
                    reference_range_current = self.reference_range[param]

                    min_diff = symbolic.SymExpr(reference_range_current.min_element()[0] \
                                    - target_range_current.min_element()[0])
                    max_diff = symbolic.SymExpr(target_range_current.max_element()[0] \
                                    - reference_range_current.max_element()[0])

                    try:
                        min_diff = symbolic.evaluate(min_diff, {})
                        max_diff = symbolic.evaluate(max_diff, {})
                    except TypeError:
                        raise RuntimeError("Symbolic evaluation of map "
                                           "ranges failed. Please check "
                                           "your parameters and match.")

                    self.tile_sizes.append(tile_stride + max_diff + min_diff)
                    self.tile_offset_lower.append(
                        symbolic.pystr_to_symbolic(str(min_diff)))
                    self.tile_offset_upper.append(
                        symbolic.pystr_to_symbolic(str(max_diff)))

                # get calculated parameters
                tile_size = self.tile_sizes[-1]

                dim_idx -= removed_maps
                # If map or tile sizes are trivial, skip strip-mining map dimension
                # special cases:
                # if tile size is trivial AND we have an invariant dimension, skip
                if tile_size == map.range.size()[dim_idx] and (
                        dim_idx + removed_maps) in invariant_dims:
                    continue

                # trivial map: we just continue
                if map.range.size()[dim_idx] in [0, 1]:
                    continue

                if tile_size == 1 and tile_stride == 1 and (
                        dim_idx + removed_maps) in invariant_dims:
                    trivial = True
                    removed_maps += 1

                # indent all map ranges accordingly and then perform
                # strip mining on these. Offset inner maps accordingly afterwards

                range_tuple = (map.range[dim_idx][0] +
                               self.tile_offset_lower[-1],
                               map.range[dim_idx][1] -
                               self.tile_offset_upper[-1],
                               map.range[dim_idx][2])
                map.range[dim_idx] = range_tuple
                stripmine = StripMining(sdfg_id, self.state_id,
                                        stripmine_subgraph, 0)

                stripmine.tiling_type = 'ceilrange'
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = self.prefix if not trivial else ''
                # use tile_stride for both -- we will extend
                # the inner tiles later
                stripmine.tile_size = str(tile_stride)
                stripmine.tile_stride = str(tile_stride)
                outer_map = stripmine.apply(sdfg)
                outer_map.schedule = original_schedule

                # apply to the new map the schedule of the original one
                map_entry.schedule = self.schedule

                # if tile stride is 1, we can make a nice simplification by just
                # taking the overapproximated inner range as inner range
                # this eliminates the min/max in the range which
                # enables loop unrolling
                if tile_stride == 1:
                    map_entry.range[dim_idx] = tuple(
                        symbolic.SymExpr(el._approx_expr) if isinstance(
                            el, symbolic.SymExpr) else el
                        for el in map_entry.range[dim_idx])

                # in map_entry: enlarge tiles by upper and lower offset
                # doing it this way and not via stripmine strides ensures
                # that the max gets changed as well
                old_range = map_entry.range[dim_idx]
                map_entry.range[dim_idx] = ((old_range[0] -
                                             self.tile_offset_lower[-1]),
                                            (old_range[1] +
                                             self.tile_offset_upper[-1]),
                                            old_range[2])

                # We have to propagate here for correct outer volume and subset sizes
                _propagate_node(graph, map_entry)
                _propagate_node(graph, graph.exit_node(map_entry))

                # usual tiling pipeline
                if last_map_entry:
                    new_map_entry = graph.in_edges(map_entry)[0].src
                    mapcollapse_subgraph = {
                        MapCollapse._outer_map_entry:
                        graph.node_id(last_map_entry),
                        MapCollapse._inner_map_entry:
                        graph.node_id(new_map_entry)
                    }
                    mapcollapse = MapCollapse(sdfg_id, self.state_id,
                                              mapcollapse_subgraph, 0)
                    mapcollapse.apply(sdfg)
                last_map_entry = graph.in_edges(map_entry)[0].src
            # add last instance of map entries to _outer_entries
            if last_map_entry:
                self._outer_entries.add(last_map_entry)

            # Map Unroll Feature: only unroll if conditions are met:
            # Only unroll if at least one of the inner map ranges is strictly larger than 1
            # Only unroll if strides all are one
            if self.unroll_loops and all(s == 1 for s in self.strides) and any(
                    s not in [0, 1] for s in map_entry.range.size()):
                l = len(map_entry.params)
                if l > 1:
                    subgraph = {
                        MapExpansion.map_entry: graph.nodes().index(map_entry)
                    }
                    trafo_expansion = MapExpansion(sdfg.sdfg_id,
                                                   sdfg.nodes().index(graph),
                                                   subgraph, 0)
                    trafo_expansion.apply(sdfg)
                maps = [map_entry]
                for _ in range(l - 1):
                    map_entry = graph.out_edges(map_entry)[0].dst
                    maps.append(map_entry)

                for map in reversed(maps):
                    # MapToForLoop
                    subgraph = {
                        MapToForLoop._map_entry: graph.nodes().index(map)
                    }
                    trafo_for_loop = MapToForLoop(sdfg.sdfg_id,
                                                  sdfg.nodes().index(graph),
                                                  subgraph, 0)
                    trafo_for_loop.apply(sdfg)
                    nsdfg = trafo_for_loop.nsdfg

                    # LoopUnroll

                    guard = trafo_for_loop.guard
                    end = trafo_for_loop.after_state
                    begin = next(e.dst for e in nsdfg.out_edges(guard)
                                 if e.dst != end)

                    subgraph = {
                        DetectLoop._loop_guard: nsdfg.nodes().index(guard),
                        DetectLoop._loop_begin: nsdfg.nodes().index(begin),
                        DetectLoop._exit_state: nsdfg.nodes().index(end)
                    }
                    transformation = LoopUnroll(0, 0, subgraph, 0)
                    transformation.apply(nsdfg)
            elif self.unroll_loops:
                warnings.warn(
                    "Did not unroll loops. Either all ranges are equal to "
                    "one or range difference is symbolic.")

        self._outer_entries = list(self._outer_entries)
Esempio n. 16
0
class BufferTiling(transformation.Transformation):
    """ Implements the buffer tiling transformation.

        BufferTiling tiles a buffer that is in between two maps, where the preceding map
        writes to the buffer and the succeeding map reads from it.
        It introduces additional computations in exchange for reduced memory footprint.
        Commonly used to make use of shared memory on GPUs.
    """

    _map1_exit = nodes.MapExit(nodes.Map('', [], []))
    _array = nodes.AccessNode('')
    _map2_entry = nodes.MapEntry(nodes.Map('', [], []))

    tile_sizes = ShapeProperty(dtype=tuple,
                               default=(128, 128, 128),
                               desc="Tile size per dimension")

    # Returns a list of graphs that represent the pattern
    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(
                BufferTiling._map1_exit,
                BufferTiling._array,
                BufferTiling._map2_entry,
            )
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        map1_exit = graph.nodes()[candidate[BufferTiling._map1_exit]]
        map2_entry = graph.nodes()[candidate[BufferTiling._map2_entry]]

        for buf in graph.all_nodes_between(map1_exit, map2_entry):
            # Check that buffers are AccessNodes.
            if not isinstance(buf, nodes.AccessNode):
                return False

            # Check that buffers are transient.
            if not sdfg.arrays[buf.data].transient:
                return False

            # Check that buffers have exactly 1 input and 1 output edge.
            if graph.in_degree(buf) != 1:
                return False
            if graph.out_degree(buf) != 1:
                return False

            # Check that buffers are next to the maps.
            if graph.in_edges(buf)[0].src != map1_exit:
                return False
            if graph.out_edges(buf)[0].dst != map2_entry:
                return False

            # Check that the data consumed is provided.
            provided = graph.in_edges(buf)[0].data.subset
            consumed = graph.out_edges(buf)[0].data.subset
            if not provided.covers(consumed):
                return False

            # Check that buffers occur only once in this state.
            num_occurrences = len([
                n for n in graph.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == buf
            ])
            if num_occurrences > 1:
                return False
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map1_exit = graph.nodes()[candidate[BufferTiling._map1_exit]]
        map2_entry = graph.nodes()[candidate[BufferTiling._map2_entry]]

        return " -> ".join(entry.map.label + ": " + str(entry.map.params)
                           for entry in [map1_exit, map2_entry])

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        map1_exit = graph.nodes()[self.subgraph[self._map1_exit]]
        map1_entry = graph.entry_node(map1_exit)
        map2_entry = graph.nodes()[self.subgraph[self._map2_entry]]
        buffers = graph.all_nodes_between(map1_exit, map2_entry)
        # Situation:
        # -> map1_entry -> ... -> map1_exit -> buffers -> map2_entry -> ...

        lower_extents = tuple(b - a for a, b in zip(
            map1_entry.range.min_element(), map2_entry.range.min_element()))
        upper_extents = tuple(a - b for a, b in zip(
            map1_entry.range.max_element(), map2_entry.range.max_element()))

        # Tile the first map with overlap
        MapTilingWithOverlap.apply_to(sdfg,
                                      map_entry=map1_entry,
                                      options={
                                          'tile_sizes': self.tile_sizes,
                                          'lower_overlap': lower_extents,
                                          'upper_overlap': upper_extents
                                      })
        tile_map1_exit = graph.out_edges(map1_exit)[0].dst
        tile_map1_entry = graph.entry_node(tile_map1_exit)
        tile_map1_entry.label = 'BufferTiling'

        # Tile the second map
        MapTiling.apply_to(sdfg,
                           map_entry=map2_entry,
                           options={
                               'tile_sizes': self.tile_sizes,
                               'tile_trivial': True
                           })
        tile_map2_entry = graph.in_edges(map2_entry)[0].src

        # Fuse maps
        some_buffer = next(
            iter(buffers))  # some dummy to pass to MapFusion.apply_to()
        MapFusion.apply_to(sdfg,
                           first_map_exit=tile_map1_exit,
                           array=some_buffer,
                           second_map_entry=tile_map2_entry)

        # Optimize the simple cases
        map1_entry.range.ranges = [
            (r[0], r[0], r[2]) if l_ext == 0 and u_ext == 0 and ts == 1 else r
            for r, l_ext, u_ext, ts in zip(map1_entry.range.ranges,
                                           lower_extents, upper_extents,
                                           self.tile_sizes)
        ]

        map2_entry.range.ranges = [
            (r[0], r[0], r[2]) if ts == 1 else r
            for r, ts in zip(map2_entry.range.ranges, self.tile_sizes)
        ]

        if any(ts == 1 for ts in self.tile_sizes):
            if any(r[0] == r[1] for r in map1_entry.map.range):
                TrivialMapElimination.apply_to(sdfg, _map_entry=map1_entry)
            if any(r[0] == r[1] for r in map2_entry.map.range):
                TrivialMapElimination.apply_to(sdfg, _map_entry=map2_entry)
Esempio n. 17
0
class ProcessGrid(object):
    """
    Process-grids implement cartesian topologies similarly to cartesian communicators created with [MPI_Cart_create](https://www.mpich.org/static/docs/latest/www3/MPI_Cart_create.html)
    and [MPI_Cart_sub](https://www.mpich.org/static/docs/v3.2/www3/MPI_Cart_sub.html).

    The boolean property `is_subgrid` provides a switch between "parent" process-grids (equivalent to communicators
    create with `MPI_Cart_create`) and sub-grids (equivalent to communicators created with `MPI_Cart_sub`).
    
    If `is_subgrid` is false, a "parent" process-grid is created. The `shape` property is equivalent to the `dims`
    parameter of `MPI_Cart_create`. The other properties are ignored. All "parent" process-grids spawn out of
    `MPI_COMM_WORLD`, while their `periods` and `reorder` parameters are set to False.

    If `is_subgrid` is true, then the `parent_grid` is partitioned to lower-dimensional cartesian sub-grids (for more
    details, see the documentation of `MPI_Cart_sub`). The `parent_grid` property is equivalent to the `comm` parameter
    of `MPI_Cart_sub`. The `color` property corresponds to the `remain_dims` parameter of `MPI_Cart_sub`, i.e., the i-th
    entry specifies whether the i-th dimension is kep in the sub-grid or is dropped.
    
    The following properties store information used in the redistribution of data:

    The `exact_grid` property is either None or the rank of an MPI process in the `parent_grid`. If set then, out of all
    the sub-grids created, only the one that contains this rank is used for collective communication. The `root`
    property is used to select the root rank for purposed of collective communication (by default 0).
    """

    name = Property(dtype=str, desc="The process-grid's name.")
    is_subgrid = Property(
        dtype=bool,
        default=False,
        desc="If true, spanws sub-grids out of the parent process-grid.")
    shape = ShapeProperty(default=[], desc="The process-grid's shape.")
    parent_grid = Property(
        dtype=str,
        allow_none=True,
        default=None,
        desc="Name of the parent process-grid "
        "(mandatory if `is_subgrid` is true, otherwise ignored).")
    color = ListProperty(
        int,
        allow_none=True,
        default=None,
        desc=
        "The i-th entry specifies whether the i-th dimension is kept in the sub-grid or is "
        "dropped (mandatory if `is_subgrid` is true, otherwise ignored).")
    exact_grid = SymbolicProperty(
        allow_none=True,
        default=None,
        desc=
        "If set then, out of all the sub-grids created, only the one that contains the "
        "rank with id `exact_grid` will be utilized for collective communication "
        "(optional if `is_subgrid` is true, otherwise ignored).")
    root = SymbolicProperty(default=0,
                            desc="The root rank for collective communication.")

    def __init__(self,
                 name: str,
                 is_subgrid: bool,
                 shape: ShapeType = None,
                 parent_grid: str = None,
                 color: Sequence[Union[Integral, bool]] = None,
                 exact_grid: RankType = None,
                 root: RankType = 0):
        self.name = name
        self.is_subgrid = is_subgrid
        if is_subgrid:
            self.parent_grid = parent_grid.name
            self.color = color
            self.exact_grid = exact_grid
            self.shape = [
                parent_grid.shape[i] for i, remain in enumerate(color)
                if remain
            ]
        else:
            self.shape = shape
        self.root = root
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if self.is_subgrid:
            if not self.parent_grid or len(self.parent_grid) == 0:
                raise ValueError(
                    'Sub-grid misses its corresponding parent process-grid')
        if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError(
                'Shape must be a list or tuple of integer values or symbols')
        if self.color and any(c < 0 or c > 1 for c in self.color):
            raise ValueError(
                'Color must have only logical true (1) or false (0) values.')
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)
        retdict = {"type": type(self).__name__, "attributes": attrs}
        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls('tmp', False, [])
        serialize.set_properties_from_json(ret, json_obj, context=context)
        # Check validity now
        ret.validate()
        return ret

    def init_code(self):
        """ Outputs MPI allocation/initialization code for the process-grid.
            It is assumed that the following variables exist in the SDFG program's state:
            - MPI_Comm {self.name}_comm
            - MPI_Group {self.name}_group
            - int {self.name}_rank
            - int {self.name}_size
            - int* {self.name}_dims
            - int* {self.name}_remain
            - int* {self.name}_coords
            - bool {self.name})_valid

            These variables are typically added to the program's state through a Tasklet, e.g., the Dummy MPI node (for
            more details, check the DaCe MPI library in `dace/libraries/mpi`).

        """
        if self.is_subgrid:
            tmp = ""
            for i, s in enumerate(self.shape):
                tmp += f"__state->{self.name}_dims[{i}] = {s};\n"
            tmp += f"""
                __state->{self.name}_valid = false;
                if (__state->{self.parent_grid}_valid) {{
                    int {self.name}_remain[{len(self.color)}] = {{{', '.join(['1' if c else '0' for c in self.color])}}};
                    MPI_Cart_sub(__state->{self.parent_grid}_comm, {self.name}_remain, &__state->{self.name}_comm);
                    MPI_Comm_group(__state->{self.name}_comm, &__state->{self.name}_group);
                    MPI_Comm_rank(__state->{self.name}_comm, &__state->{self.name}_rank);
                    MPI_Comm_size(__state->{self.name}_comm, &__state->{self.name}_size);
                    MPI_Cart_coords(__state->{self.name}_comm, __state->{self.name}_rank, {len(self.shape)}, __state->{self.name}_coords);
            """
            if self.exact_grid is not None:
                tmp += f"""
                    int ranks1[1] = {{{self.exact_grid}}};
                    int ranks2[1];
                    MPI_Group_translate_ranks(__state->{self.parent_grid}_group, 1, ranks1, __state->{self.name}_group, ranks2);
                    __state->{self.name}_valid = (ranks2[0] != MPI_PROC_NULL && ranks2[0] != MPI_UNDEFINED);
                }}
                """
            else:
                tmp += f"""
                    __state->{self.name}_valid = true;
                }}
                """
            return tmp
        else:
            tmp = ""
            for i, s in enumerate(self.shape):
                tmp += f"__state->{self.name}_dims[{i}] = {s};\n"
            tmp += f"""
                int {self.name}_periods[{len(self.shape)}] = {{0}};
                MPI_Cart_create(MPI_COMM_WORLD, {len(self.shape)}, __state->{self.name}_dims, {self.name}_periods, 0, &__state->{self.name}_comm);
                if (__state->{self.name}_comm != MPI_COMM_NULL) {{
                    MPI_Comm_group(__state->{self.name}_comm, &__state->{self.name}_group);
                    MPI_Comm_rank(__state->{self.name}_comm, &__state->{self.name}_rank);
                    MPI_Comm_size(__state->{self.name}_comm, &__state->{self.name}_size);
                    MPI_Cart_coords(__state->{self.name}_comm, __state->{self.name}_rank, {len(self.shape)}, __state->{self.name}_coords);
                    __state->{self.name}_valid = true;
                }} else {{
                    __state->{self.name}_group = MPI_GROUP_NULL;
                    __state->{self.name}_rank = MPI_PROC_NULL;
                    __state->{self.name}_size = 0;
                    __state->{self.name}_valid = false;
                }}
            """
            return tmp

    def exit_code(self):
        """ Outputs MPI deallocation code for the process-grid. """
        return f"""