Ejemplo n.º 1
0
class Pipeline(Map):
    """ This a convenience-subclass of Map that allows easier implementation of
        loop nests (using regular Map indices) that need a constant-sized
        initialization and drain phase (e.g., N*M + c iterations), which would
        otherwise need a flattened one-dimensional map.
    """
    init_size = SymbolicProperty(default=0,
                                 desc="Number of initialization iterations.")
    init_overlap = Property(
        dtype=bool,
        default=True,
        desc="Whether to increment regular map indices during initialization.")
    drain_size = SymbolicProperty(default=1,
                                  desc="Number of drain iterations.")
    drain_overlap = Property(
        dtype=bool,
        default=True,
        desc="Whether to increment regular map indices during pipeline drain.")

    def __init__(self,
                 *args,
                 init_size=0,
                 init_overlap=False,
                 drain_size=0,
                 drain_overlap=False,
                 **kwargs):
        super(Pipeline, self).__init__(*args, **kwargs)
        self.init_size = init_size
        self.init_overlap = init_overlap
        self.drain_size = drain_size
        self.drain_overlap = drain_overlap

    def iterator_str(self):
        return "__" + "".join(self.params)

    def loop_bound_str(self):
        from dace.codegen.targets.common import sym2cpp
        bound = 1
        for begin, end, step in self.range:
            bound *= (step + end - begin) // step
        # Add init and drain phases when relevant
        add_str = (" + " + sym2cpp(self.init_size)
                   if self.init_size != 0 and not self.init_overlap else "")
        add_str += (" + " + sym2cpp(self.drain_size)
                    if self.drain_size != 0 and not self.drain_overlap else "")
        return sym2cpp(bound) + add_str

    def init_condition(self):
        """Variable that can be checked to see if pipeline is currently in
           initialization phase."""
        if self.init_size == 0:
            raise ValueError("No init condition exists for " + self.label)
        return self.iterator_str() + "_init"

    def drain_condition(self):
        """Variable that can be checked to see if pipeline is currently in
           draining phase."""
        if self.drain_size == 0:
            raise ValueError("No drain condition exists for " + self.label)
        return self.iterator_str() + "_drain"
Ejemplo n.º 2
0
class Send(nodes.LibraryNode):

    # Global properties
    implementations = {
        "NCCL": ExpandSendNCCL,
    }
    default_implementation = "NCCL"

    # Object fields
    peer = SymbolicProperty(default=0,
                            allow_none=True,
                            desc="The gpu on which the receive buffer resides")

    def __init__(self,
                 peer: symbolic.SymbolicType = 0,
                 debuginfo=None,
                 *args,
                 **kwargs):

        super().__init__(name='nccl_Send', *args, **kwargs)
        self.peer = peer
        self.schedule = dtypes.ScheduleType.GPU_Multidevice
        self.debuginfo = debuginfo

    @staticmethod
    def from_json(json_obj, context=None):
        ret = Send(None)
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)
        return ret

    def __str__(self):
        return f'nccl_Send(peer={self.peer})'

    def validate(self, sdfg: SDFG, state: SDFGState):
        in_edges = state.in_edges(self)
        if len(in_edges) not in [1, 2]:
            raise ValueError("NCCL Send must have one or two inputs.")
        out_edges = state.out_edges(self)
        if len(out_edges) not in [0, 1]:
            raise ValueError("NCCL Send must have zero or one output.")

    @property
    def free_symbols(self) -> Set[str]:
        result = super().free_symbols
        result.update(map(str, self.peer.free_symbols))
        return result
Ejemplo n.º 3
0
    def _label(self, shape):
        result = ''
        if self.data is not None:
            result = self.data

        if self.subset is None:
            return result

        num_elements = self.subset.num_elements()
        if self.num_accesses != num_elements:
            if self.num_accesses == -1:
                result += '(dyn) '
            else:
                result += '(%s) ' % SymbolicProperty.to_string(
                    self.num_accesses)
        arrayNotation = True
        try:
            if shape is not None and reduce(operator.mul, shape, 1) == 1:
                # Don't draw array if we're accessing a single element and it's zero
                if all(s == 0 for s in self.subset.min_element()):
                    arrayNotation = False
        except TypeError:
            # Will fail if trying to check the truth value of a sympy expr
            pass
        if arrayNotation:
            result += '[%s]' % str(self.subset)
        if self.wcr is not None and str(self.wcr) != '':
            # Autodetect reduction type
            redtype = detect_reduction_type(self.wcr)
            if redtype == dtypes.ReductionType.Custom:
                wcrstr = unparse(ast.parse(self.wcr).body[0].value.body)
            else:
                wcrstr = str(redtype)
                wcrstr = wcrstr[wcrstr.find('.') + 1:]  # Skip "ReductionType."

            result += ' (CR: %s' % wcrstr
            if self.wcr_identity is not None:
                result += ', id: %s' % str(self.wcr_identity)
            result += ')'

        if self.other_subset is not None:
            result += ' -> [%s]' % str(self.other_subset)
        return result
Ejemplo n.º 4
0
class Stream(Data):
    """ Stream (or stream array) data descriptor. """

    # Properties
    offset = ListProperty(element_type=symbolic.pystr_to_symbolic)
    buffer_size = SymbolicProperty(desc="Size of internal buffer.", default=0)

    def __init__(self,
                 dtype,
                 buffer_size,
                 shape=None,
                 transient=False,
                 storage=dtypes.StorageType.Default,
                 location=None,
                 offset=None,
                 lifetime=dtypes.AllocationLifetime.Scope,
                 debuginfo=None):

        if shape is None:
            shape = (1, )

        self.buffer_size = buffer_size

        if offset is not None:
            if len(offset) != len(shape):
                raise TypeError('Offset must be the same size as shape')
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        super(Stream, self).__init__(dtype, shape, transient, storage, location,
                                     lifetime, debuginfo)

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls(dtypes.int8, 1)
        serialize.set_properties_from_json(ret, json_obj, context=context)

        # Check validity now
        ret.validate()
        return ret

    def __repr__(self):
        return '%s (dtype=%s, shape=%s)' % (type(self).__name__, self.dtype,
                                            self.shape)

    @property
    def total_size(self):
        return _prod(self.shape)

    @property
    def strides(self):
        return [_prod(self.shape[i + 1:]) for i in range(len(self.shape))]

    def clone(self):
        return type(self)(self.dtype, self.buffer_size, self.shape,
                          self.transient, self.storage, self.location,
                          self.offset, self.lifetime, self.debuginfo)

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, type(self)):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            if dim != otherdim:
                return False
        return True

    def as_arg(self, with_types=True, for_call=False, name=None):
        if not with_types or for_call: return name
        if self.storage in [
                dtypes.StorageType.GPU_Global, dtypes.StorageType.GPU_Shared
        ]:
            return 'dace::GPUStream<%s, %s> %s' % (str(
                self.dtype.ctype), 'true' if sp.log(
                    self.buffer_size, 2).is_Integer else 'false', name)

        return 'dace::Stream<%s> %s' % (str(self.dtype.ctype), name)

    def sizes(self):
        return [
            d.name if isinstance(d, symbolic.symbol) else str(d)
            for d in self.shape
        ]

    def size_string(self):
        return (" * ".join(
            [cppunparse.pyexpr2cpp(symbolic.symstr(s)) for s in self.shape]))

    def is_stream_array(self):
        return _prod(self.shape) != 1

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sp.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sp.Symbol(str(s), **s.assumptions0)
                else:
                    s = sp.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sp.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sp.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sp.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True

    @property
    def free_symbols(self):
        result = super().free_symbols
        if isinstance(self.buffer_size, sp.Expr):
            result |= set(self.buffer_size.free_symbols)
        for o in self.offset:
            if isinstance(o, sp.Expr):
                result |= set(o.free_symbols)

        return result
Ejemplo n.º 5
0
class StripMining(transformation.Transformation):
    """ Implements the strip-mining transformation.

        Strip-mining takes as input a map dimension and splits it into
        two dimensions. The new dimension iterates over the range of
        the original one with a parameterizable step, called the tile
        size. The original dimension is changed to iterates over the
        range of the tile size, with the same step as before.
    """

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    # Properties
    dim_idx = Property(dtype=int,
                       default=-1,
                       desc="Index of dimension to be strip-mined")
    new_dim_prefix = Property(dtype=str,
                              default="tile",
                              desc="Prefix for new dimension name")
    tile_size = SymbolicProperty(
        default=64,
        desc="Tile size of strip-mined dimension, "
        "or number of tiles if tiling_type=number_of_tiles")
    tile_stride = SymbolicProperty(default=0,
                                   desc="Stride between two tiles of the "
                                   "strip-mined dimension. If zero, it is set "
                                   "equal to the tile size.")
    tile_offset = SymbolicProperty(default=0,
                                   desc="Tile stride offset (negative)")
    divides_evenly = Property(dtype=bool,
                              default=False,
                              desc="Tile size divides dimension range evenly?")
    strided = Property(
        dtype=bool,
        default=False,
        desc="Continuous (false) or strided (true) elements in tile")

    tiling_type = Property(
        dtype=str,
        default='normal',
        choices=['normal', 'ceilrange', 'number_of_tiles'],
        allow_none=True,
        desc="normal: the outerloop increments with tile_size, "
        "ceilrange: uses ceiling(N/tile_size) in outer range, "
        "number_of_tiles: tiles the map into the number of provided tiles, "
        "provide the number of tiles over tile_size")

    skew = Property(
        dtype=bool,
        default=False,
        desc="If True, offsets inner tile back such that it starts with zero")

    @staticmethod
    def annotates_memlets():
        return True

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(StripMining._map_entry)
            # kStripMining._tasklet, StripMining._map_exit)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = graph.nodes()[candidate[StripMining._map_entry]]
        return map_entry.map.label + ': ' + str(map_entry.map.params)

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        # Strip-mine selected dimension.
        _, _, new_map = self._stripmine(sdfg, graph, self.subgraph)
        return new_map

    # def __init__(self, tag=True):
    def __init__(self, *args, **kwargs):
        self._entry = nodes.EntryNode()
        self._tasklet = nodes.Tasklet('_')
        self._exit = nodes.ExitNode()
        super().__init__(*args, **kwargs)
        # self.tag = tag

    @property
    def entry(self):
        return self._entry

    @property
    def exit(self):
        return self._exit

    @property
    def tasklet(self):
        return self._tasklet

    def print_match_pattern(self, candidate):
        gentry = candidate[self.entry]
        return str(gentry.map.params[-1])

    def _find_new_dim(self, sdfg: SDFG, state: SDFGState,
                      entry: nodes.MapEntry, prefix: str, target_dim: str):
        """ Finds a variable that is not already defined in scope. """
        stree = state.scope_tree()
        if len(prefix) == 0:
            return target_dim
        candidate = '%s_%s' % (prefix, target_dim)
        index = 1
        while candidate in map(str, stree[entry].defined_vars):
            candidate = '%s%d_%s' % (prefix, index, target_dim)
            index += 1
        return candidate

    def _create_strided_range(self, sdfg: SDFG, state: SDFGState,
                              map_entry: nodes.MapEntry):
        map_exit = state.exit_node(map_entry)
        dim_idx = self.dim_idx
        new_dim_prefix = self.new_dim_prefix
        tile_size = self.tile_size
        divides_evenly = self.divides_evenly
        tile_stride = self.tile_stride
        if tile_stride == 0:
            tile_stride = tile_size
        if tile_stride != tile_size:
            raise NotImplementedError

        # Retrieve parameter and range of dimension to be strip-mined.
        target_dim = map_entry.map.params[dim_idx]
        td_from, td_to, td_step = map_entry.map.range[dim_idx]
        new_dim = self._find_new_dim(sdfg, state, map_entry, new_dim_prefix,
                                     target_dim)
        new_dim_range = (td_from, td_to, tile_size)
        new_map = nodes.Map(map_entry.map.label, [new_dim],
                            subsets.Range([new_dim_range]))

        dimsym = dace.symbolic.pystr_to_symbolic(new_dim)
        td_from_new = dimsym
        if divides_evenly:
            td_to_new = dimsym + tile_size - 1
        else:
            if isinstance(td_to, dace.symbolic.SymExpr):
                td_to = td_to.expr
            td_to_new = dace.symbolic.SymExpr(
                sympy.Min(dimsym + tile_size - 1, td_to),
                dimsym + tile_size - 1)
        td_step_new = td_step

        return new_dim, new_map, (td_from_new, td_to_new, td_step_new)

    def _create_ceil_range(self, sdfg: SDFG, graph: SDFGState,
                           map_entry: nodes.MapEntry):
        map_exit = graph.exit_node(map_entry)

        # Retrieve transformation properties.
        dim_idx = self.dim_idx
        new_dim_prefix = self.new_dim_prefix
        tile_size = self.tile_size
        divides_evenly = self.divides_evenly
        strided = self.strided
        offset = self.tile_offset

        tile_stride = self.tile_stride
        if tile_stride == 0:
            tile_stride = tile_size

        # Retrieve parameter and range of dimension to be strip-mined.
        target_dim = map_entry.map.params[dim_idx]
        td_from, td_to, td_step = map_entry.map.range[dim_idx]
        # Create new map. Replace by cloning map object?
        new_dim = self._find_new_dim(sdfg, graph, map_entry, new_dim_prefix,
                                     target_dim)
        nd_from = 0
        if tile_stride == 1:
            nd_to = td_to - td_from
        else:
            nd_to = symbolic.pystr_to_symbolic(
                'int_ceil(%s + 1 - %s, %s) - 1' %
                (symbolic.symstr(td_to), symbolic.symstr(td_from),
                 symbolic.symstr(tile_stride)))
        nd_step = 1
        new_dim_range = (nd_from, nd_to, nd_step)
        new_map = nodes.Map(new_dim + '_' + map_entry.map.label, [new_dim],
                            subsets.Range([new_dim_range]))

        # Change the range of the selected dimension to iterate over a single
        # tile
        if strided:
            td_from_new = symbolic.pystr_to_symbolic(new_dim)
            td_to_new_approx = td_to
            td_step = tile_size

        elif offset == 0:
            td_from_new = symbolic.pystr_to_symbolic(
                '%s + %s * %s' %
                (symbolic.symstr(td_from), symbolic.symstr(new_dim),
                 symbolic.symstr(tile_stride)))
            td_to_new_exact = symbolic.pystr_to_symbolic(
                'min(%s + 1, %s + %s * %s + %s) - 1' %
                (symbolic.symstr(td_to), symbolic.symstr(td_from),
                 symbolic.symstr(tile_stride), symbolic.symstr(new_dim),
                 symbolic.symstr(tile_size)))
            td_to_new_approx = symbolic.pystr_to_symbolic(
                '%s + %s * %s + %s - 1' %
                (symbolic.symstr(td_from), symbolic.symstr(tile_stride),
                 symbolic.symstr(new_dim), symbolic.symstr(tile_size)))

        else:
            # include offset
            td_from_new_exact = symbolic.pystr_to_symbolic(
                'max(%s,%s + %s * %s - %s)' %
                (symbolic.symstr(td_from), symbolic.symstr(td_from),
                 symbolic.symstrtr(tile_stride), symbolic.symstr(new_dim),
                 symbolic.symstr(offset)))
            td_from_new_approx = symbolic.pystr_to_symbolic(
                '%s + %s * %s - %s ' %
                (symbolic.symstr(td_from), symbolic.symstr(tile_stride),
                 symbolic.symstr(new_dim), symbolic.symstr(offset)))
            td_from_new = dace.symbolic.SymExpr(td_from_new_exact,
                                                td_from_new_approx)

            td_to_new_exact = symbolic.pystr_to_symbolic(
                'min(%s + 1, %s + %s * %s + %s - %s) -1' %
                (symbolic.symstr(td_to), symbolic.symstr(td_from),
                 symbolic.symstr(tile_stride), symbolic.symstr(new_dim),
                 symbolic.symstr(tile_size), symbolic.symstr(offset)))
            td_to_new_approx = symbolic.pystr_to_symbolic(
                '%s + %s * %s + %s - %s - 1' %
                (symbolic.symstr(td_from), symbolic.symstr(tile_stride),
                 symbolic.symstr(new_dim), symbolic.symstr(tile_size),
                 symbolic.symstr(offset)))

        if divides_evenly or strided:
            td_to_new = td_to_new_approx
        else:
            td_to_new = dace.symbolic.SymExpr(td_to_new_exact,
                                              td_to_new_approx)
        return new_dim, new_map, (td_from_new, td_to_new, td_step)

    def _create_from_tile_numbers(self, sdfg: SDFG, state: SDFGState,
                                  map_entry: nodes.MapEntry):
        map_exit = state.exit_node(map_entry)

        # Retrieve transformation properties.
        dim_idx = self.dim_idx
        new_dim_prefix = self.new_dim_prefix
        divides_evenly = self.divides_evenly
        number_of_tiles = self.tile_size
        tile_stride = self.tile_stride

        number_of_tiles = dace.symbolic.pystr_to_symbolic(number_of_tiles)

        # Retrieve parameter and range of dimension to be strip-mined.
        target_dim = map_entry.map.params[dim_idx]
        td_from, td_to, td_step = map_entry.map.range[dim_idx]
        tile_size = map_entry.map.range.size_exact()[dim_idx] / number_of_tiles

        if tile_stride == 0:
            tile_stride = tile_size
        if tile_stride != tile_size:
            raise NotImplementedError

        new_dim = self._find_new_dim(sdfg, state, map_entry, new_dim_prefix,
                                     target_dim)
        new_dim_range = (td_from, number_of_tiles - 1, 1)
        new_map = nodes.Map(map_entry.map.label, [new_dim],
                            subsets.Range([new_dim_range]))

        dimsym = dace.symbolic.pystr_to_symbolic(new_dim)
        td_from_new = dimsym * tile_size
        if divides_evenly:
            td_to_new = (dimsym + 1) * tile_size - 1
        else:
            if isinstance(td_to, dace.symbolic.SymExpr):
                td_to = td_to.expr
            td_to_new = dace.symbolic.SymExpr(
                sympy.Min((dimsym + 1) * tile_size - 1, td_to),
                (dimsym + 1) * tile_size - 1)
        td_step_new = td_step
        return new_dim, new_map, (td_from_new, td_to_new, td_step_new)

    def _stripmine(self, sdfg, graph, candidate):
        # Retrieve map entry and exit nodes.
        map_entry = graph.nodes()[candidate[StripMining._map_entry]]
        map_exit = graph.exit_node(map_entry)

        # Retrieve transformation properties.
        dim_idx = self.dim_idx
        target_dim = map_entry.map.params[dim_idx]

        if self.tiling_type == 'ceilrange':
            new_dim, new_map, td_rng = self._create_ceil_range(
                sdfg, graph, map_entry)
        elif self.tiling_type == 'number_of_tiles':
            new_dim, new_map, td_rng = self._create_from_tile_numbers(
                sdfg, graph, map_entry)
        else:
            new_dim, new_map, td_rng = self._create_strided_range(
                sdfg, graph, map_entry)

        new_map_entry = nodes.MapEntry(new_map)
        new_map_exit = nodes.MapExit(new_map)

        td_to_new_approx = td_rng[1]
        if isinstance(td_to_new_approx, dace.symbolic.SymExpr):
            td_to_new_approx = td_to_new_approx.approx

        # Special case: If range is 1 and no prefix was specified, skip range
        if td_rng[0] == td_to_new_approx and target_dim == new_dim:
            map_entry.map.range = subsets.Range(
                [r for i, r in enumerate(map_entry.map.range) if i != dim_idx])
            map_entry.map.params = [
                p for i, p in enumerate(map_entry.map.params) if i != dim_idx
            ]
            if len(map_entry.map.params) == 0:
                raise ValueError('Strip-mining all dimensions of the map with '
                                 'empty tiles is disallowed')
        else:
            map_entry.map.range[dim_idx] = td_rng

        # Make internal map's schedule to "not parallel"
        new_map.schedule = map_entry.map.schedule
        map_entry.map.schedule = dtypes.ScheduleType.Sequential

        # Redirect edges
        new_map_entry.in_connectors = dcpy(map_entry.in_connectors)
        sdutil.change_edge_dest(graph, map_entry, new_map_entry)
        new_map_exit.out_connectors = dcpy(map_exit.out_connectors)
        sdutil.change_edge_src(graph, map_exit, new_map_exit)

        # Create new entry edges
        new_in_edges = dict()
        entry_in_conn = {}
        entry_out_conn = {}
        for _src, src_conn, _dst, _, memlet in graph.out_edges(map_entry):
            if (src_conn is not None
                    and src_conn[:4] == 'OUT_' and not isinstance(
                        sdfg.arrays[memlet.data], dace.data.Scalar)):
                new_subset = calc_set_image(
                    map_entry.map.params,
                    map_entry.map.range,
                    memlet.subset,
                )
                conn = src_conn[4:]
                key = (memlet.data, 'IN_' + conn, 'OUT_' + conn)
                if key in new_in_edges.keys():
                    old_subset = new_in_edges[key].subset
                    new_in_edges[key].subset = calc_set_union(
                        old_subset, new_subset)
                else:
                    entry_in_conn['IN_' + conn] = None
                    entry_out_conn['OUT_' + conn] = None
                    new_memlet = dcpy(memlet)
                    new_memlet.subset = new_subset
                    if memlet.dynamic:
                        new_memlet.num_accesses = memlet.num_accesses
                    else:
                        new_memlet.num_accesses = new_memlet.num_elements()
                    new_in_edges[key] = new_memlet
            else:
                if src_conn is not None and src_conn[:4] == 'OUT_':
                    conn = src_conn[4:]
                    in_conn = 'IN_' + conn
                    out_conn = 'OUT_' + conn
                else:
                    in_conn = src_conn
                    out_conn = src_conn
                if in_conn:
                    entry_in_conn[in_conn] = None
                if out_conn:
                    entry_out_conn[out_conn] = None
                new_in_edges[(memlet.data, in_conn, out_conn)] = dcpy(memlet)
        new_map_entry.out_connectors = entry_out_conn
        map_entry.in_connectors = entry_in_conn
        for (_, in_conn, out_conn), memlet in new_in_edges.items():
            graph.add_edge(new_map_entry, out_conn, map_entry, in_conn, memlet)

        # Create new exit edges
        new_out_edges = dict()
        exit_in_conn = {}
        exit_out_conn = {}
        for _src, _, _dst, dst_conn, memlet in graph.in_edges(map_exit):
            if (dst_conn is not None
                    and dst_conn[:3] == 'IN_' and not isinstance(
                        sdfg.arrays[memlet.data], dace.data.Scalar)):
                new_subset = calc_set_image(
                    map_entry.map.params,
                    map_entry.map.range,
                    memlet.subset,
                )
                conn = dst_conn[3:]
                key = (memlet.data, 'IN_' + conn, 'OUT_' + conn)
                if key in new_out_edges.keys():
                    old_subset = new_out_edges[key].subset
                    new_out_edges[key].subset = calc_set_union(
                        old_subset, new_subset)
                else:
                    exit_in_conn['IN_' + conn] = None
                    exit_out_conn['OUT_' + conn] = None
                    new_memlet = dcpy(memlet)
                    new_memlet.subset = new_subset
                    if memlet.dynamic:
                        new_memlet.num_accesses = memlet.num_accesses
                    else:
                        new_memlet.num_accesses = new_memlet.num_elements()
                    new_out_edges[key] = new_memlet
            else:
                if dst_conn is not None and dst_conn[:3] == 'IN_':
                    conn = dst_conn[3:]
                    in_conn = 'IN_' + conn
                    out_conn = 'OUT_' + conn
                else:
                    in_conn = dst_conn
                    out_conn = dst_conn
                if in_conn:
                    exit_in_conn[in_conn] = None
                if out_conn:
                    exit_out_conn[out_conn] = None
                new_in_edges[(memlet.data, in_conn, out_conn)] = dcpy(memlet)
        new_map_exit.in_connectors = exit_in_conn
        map_exit.out_connectors = exit_out_conn
        for (_, in_conn, out_conn), memlet in new_out_edges.items():
            graph.add_edge(map_exit, out_conn, new_map_exit, in_conn, memlet)

        # Skew if necessary
        if self.skew:
            xfh.offset_map(sdfg, graph, map_entry, dim_idx, td_rng[0])

        # Return strip-mined dimension.
        return target_dim, new_dim, new_map
Ejemplo n.º 6
0
class Array(Data):
    """ Array/constant descriptor (dimensions, type and other properties). """

    # Properties
    allow_conflicts = Property(
        dtype=bool,
        default=False,
        desc='If enabled, allows more than one '
        'memlet to write to the same memory location without conflict '
        'resolution.')

    strides = ShapeProperty(
        # element_type=symbolic.pystr_to_symbolic,
        desc='For each dimension, the number of elements to '
        'skip in order to obtain the next element in '
        'that dimension.')

    total_size = SymbolicProperty(
        default=1,
        desc='The total allocated size of the array. Can be used for'
        ' padding.')

    offset = ListProperty(element_type=symbolic.pystr_to_symbolic,
                          desc='Initial offset to translate all indices by.')

    may_alias = Property(dtype=bool,
                         default=False,
                         desc='This pointer may alias with other pointers in '
                         'the same function')

    alignment = Property(dtype=int,
                         default=0,
                         desc='Allocation alignment in bytes (0 uses '
                         'compiler-default)')

    def __init__(self,
                 dtype,
                 shape,
                 transient=False,
                 allow_conflicts=False,
                 storage=dtypes.StorageType.Default,
                 location=None,
                 strides=None,
                 offset=None,
                 may_alias=False,
                 lifetime=dtypes.AllocationLifetime.Scope,
                 alignment=0,
                 debuginfo=None,
                 total_size=None):

        super(Array, self).__init__(dtype, shape, transient, storage, location,
                                    lifetime, debuginfo)

        if shape is None:
            raise IndexError('Shape must not be None')

        self.allow_conflicts = allow_conflicts
        self.may_alias = may_alias
        self.alignment = alignment

        if strides is not None:
            self.strides = cp.copy(strides)
        else:
            self.strides = [_prod(shape[i + 1:]) for i in range(len(shape))]

        self.total_size = total_size or _prod(shape)

        if offset is not None:
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        self.validate()

    def __repr__(self):
        return '%s (dtype=%s, shape=%s)' % (type(self).__name__, self.dtype,
                                            self.shape)

    def clone(self):
        return type(self)(self.dtype, self.shape, self.transient,
                          self.allow_conflicts, self.storage, self.location,
                          self.strides, self.offset, self.may_alias,
                          self.lifetime, self.alignment, self.debuginfo,
                          self.total_size)

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        # Take care of symbolic expressions
        attrs['strides'] = list(map(str, attrs['strides']))

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls(dtypes.int8, ())
        serialize.set_properties_from_json(ret, json_obj, context=context)
        # TODO: This needs to be reworked (i.e. integrated into the list property)
        ret.strides = list(map(symbolic.pystr_to_symbolic, ret.strides))

        # Check validity now
        ret.validate()
        return ret

    def validate(self):
        super(Array, self).validate()
        if len(self.strides) != len(self.shape):
            raise TypeError('Strides must be the same size as shape')

        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.strides):
            raise TypeError('Strides must be a list or tuple of integer '
                            'values or symbols')

        if len(self.offset) != len(self.shape):
            raise TypeError('Offset must be the same size as shape')

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sp.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sp.Symbol(str(s), **s.assumptions0)
                else:
                    s = sp.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sp.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sp.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sp.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, type(self)):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # Any other case (constant vs. constant), check for equality
            if otherdim != dim:
                return False
        return True

    def as_arg(self, with_types=True, for_call=False, name=None):
        arrname = name

        if not with_types or for_call:
            return arrname
        if self.may_alias:
            return str(self.dtype.ctype) + ' *' + arrname
        return str(self.dtype.ctype) + ' * __restrict__ ' + arrname

    def sizes(self):
        return [
            d.name if isinstance(d, symbolic.symbol) else str(d)
            for d in self.shape
        ]

    @property
    def free_symbols(self):
        result = super().free_symbols
        for s in self.strides:
            if isinstance(s, sp.Expr):
                result |= set(s.free_symbols)
        if isinstance(self.total_size, sp.Expr):
            result |= set(self.total_size.free_symbols)
        for o in self.offset:
            if isinstance(o, sp.Expr):
                result |= set(o.free_symbols)

        return result
Ejemplo n.º 7
0
class Memlet(object):
    """ Data movement object. Represents the data, the subset moved, and the
        manner it is reindexed (`other_subset`) into the destination.
        If there are multiple conflicting writes, this object also specifies
        how they are resolved with a lambda function.
    """

    # Properties
    veclen = Property(dtype=int, desc="Vector length")
    num_accesses = SymbolicProperty()
    subset = SubsetProperty()
    other_subset = SubsetProperty(allow_none=True)
    data = DataProperty()
    debuginfo = DebugInfoProperty()
    wcr = LambdaProperty(allow_none=True)
    wcr_identity = Property(dtype=object, default=None, allow_none=True)
    wcr_conflict = Property(dtype=bool, default=True)
    allow_oob = Property(dtype=bool,
                         default=False,
                         desc='Bypass out-of-bounds validation')

    def __init__(self,
                 data,
                 num_accesses,
                 subset,
                 vector_length,
                 wcr=None,
                 wcr_identity=None,
                 other_subset=None,
                 debuginfo=None,
                 wcr_conflict=True):
        """ Constructs a Memlet.
            @param data: The data object or name to access. B{Note:} this
                         parameter will soon be deprecated.
            @type data: Either a string of the data descriptor name or an
                        AccessNode.
            @param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 `dace.types.DYNAMIC` (-1),
                                 designates that the number of accesses is
                                 unknown at compile time.
            @param subset: The subset of `data` that is going to be accessed.
            @param vector_length: The length of a single unit of access to
                                  the data (used for vectorization 
                                  optimizations).
            @param wcr: A lambda function specifying how write-conflicts
                        are resolved. The syntax of the lambda function receives two elements: `current` value and `new` value,
                        and returns the value after resolution. For example,
                        summation is `lambda cur, new: cur + new`.
            @param wcr_identity: Identity value used for the first write 
                                 conflict. B{Note:} this parameter will soon
                                 be deprecated.
            @param other_subset: The reindexing of `subset` on the other 
                                 connected data.
            @param debuginfo: Source-code information (e.g., line, file) 
                              used for debugging.
            @param wcr_conflict: If False, forces non-locked conflict 
                                 resolution when generating code. The default
                                 is to let the code generator infer this 
                                 information from the SDFG.
        """

        # Properties
        self.num_accesses = num_accesses  # type: sympy math
        self.subset = subset  # type: subsets.Subset
        self.veclen = vector_length  # type: int (in elements, default 1)
        if hasattr(data, 'data'):
            data = data.data
        self.data = data  # type: str

        # Annotates memlet with _how_ writing is performed in case of conflict
        self.wcr = wcr
        self.wcr_identity = wcr_identity
        self.wcr_conflict = wcr_conflict

        # The subset of the other endpoint we are copying from/to (note:
        # carries the dimensionality of the other endpoint too!)
        self.other_subset = other_subset

        self.debuginfo = debuginfo

    def toJSON(self, indent=0):
        json = " " * indent + "{\n"
        indent += 2
        json += " " * indent + "\"type\" : \"" + type(self).__name__ + "\",\n"
        json += " " * indent + "\"label\" : \"" + str(self) + "\"\n"
        indent -= 2
        json += " " * indent + "}\n"
        return json

    @staticmethod
    def simple(data,
               subset_str,
               veclen=1,
               wcr_str=None,
               wcr_identity=None,
               other_subset_str=None,
               wcr_conflict=True,
               num_accesses=None,
               debuginfo=None):
        """ Constructs a Memlet from string-based expressions.
            @param data: The data object or name to access. B{Note:} this
                         parameter will soon be deprecated.
            @type data: Either a string of the data descriptor name or an
                        AccessNode.
            @param subset_str: The subset of `data` that is going to 
                               be accessed in string format. Example: '0:N'.
            @param veclen: The length of a single unit of access to
                           the data (used for vectorization optimizations).
            @param wcr_str: A lambda function (as a string) specifying 
                            how write-conflicts are resolved. The syntax 
                            of the lambda function receives two elements:
                            `current` value and `new` value,
                            and returns the value after resolution. For 
                            example, summation is 
                            `'lambda cur, new: cur + new'`.
            @param wcr_identity: Identity value used for the first write 
                                 conflict. B{Note:} this parameter will soon
                                 be deprecated.
            @param other_subset_str: The reindexing of `subset` on the other 
                                     connected data (as a string).
            @param wcr_conflict: If False, forces non-locked conflict 
                                 resolution when generating code. The default
                                 is to let the code generator infer this 
                                 information from the SDFG.
            @param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 `dace.types.DYNAMIC` (-1),
                                 designates that the number of accesses is
                                 unknown at compile time.
            @param debuginfo: Source-code information (e.g., line, file) 
                              used for debugging.
                                 
        """
        subset = SubsetProperty.from_string(subset_str)
        if num_accesses is not None:
            na = num_accesses
        else:
            na = subset.num_elements()

        if wcr_str is not None:
            wcr = LambdaProperty.from_string(wcr_str)
        else:
            wcr = None

        if other_subset_str is not None:
            other_subset = SubsetProperty.from_string(other_subset_str)
        else:
            other_subset = None

        # If it is an access node or another memlet
        if hasattr(data, 'data'):
            data = data.data

        return Memlet(data,
                      na,
                      subset,
                      veclen,
                      wcr=wcr,
                      wcr_identity=wcr_identity,
                      other_subset=other_subset,
                      wcr_conflict=wcr_conflict,
                      debuginfo=debuginfo)

    @staticmethod
    def from_array(dataname, datadesc):
        """ Constructs a Memlet that transfers an entire array's contents.
            @param dataname: The name of the data descriptor in the SDFG.
            @param datadesc: The data descriptor object.
            @type datadesc: Data.
        """
        range = subsets.Range.from_array(datadesc)
        return Memlet(dataname, range.num_elements(), range, 1)

    def __hash__(self):
        return hash((self.data, self.num_accesses, self.subset, self.veclen,
                     str(self.wcr), self.wcr_identity, self.other_subset))

    def __eq__(self, other):
        return all([
            self.data == other.data, self.num_accesses == other.num_accesses,
            self.subset == other.subset, self.veclen == other.veclen,
            self.wcr == other.wcr, self.wcr_identity == other.wcr_identity,
            self.other_subset == other.other_subset
        ])

    def num_elements(self):
        """ Returns the number of elements in the Memlet subset. """
        return self.subset.num_elements()

    def bounding_box_size(self):
        """ Returns a per-dimension upper bound on the maximum number of
            elements in each dimension.

            This bound will be tight in the case of Range.
        """
        return self.subset.bounding_box_size()

    def validate(self, sdfg, state):
        if self.data not in sdfg.arrays:
            raise KeyError('Array "%s" not found in SDFG' % self.data)

    def __label__(self, sdfg, state):
        """ Returns a string representation of the memlet for display in a 
            graph.

            @param sdfg: The SDFG in which the memlet resides.
            @param state: An SDFGState object in which the memlet resides.
        """
        if self.data is None:
            return self._label(None)
        return self._label(sdfg.arrays[self.data].shape)

    def __str__(self):
        return self._label(None)

    def _label(self, shape):
        result = ''
        if self.data is not None:
            result = self.data

        if self.subset is None:
            return result

        num_elements = self.subset.num_elements()
        if self.num_accesses != num_elements:
            result += '(%s) ' % str(self.num_accesses)
        arrayNotation = True
        try:
            if shape is not None and reduce(operator.mul, shape, 1) == 1:
                # Don't draw array if we're accessing a single element
                arrayNotation = False
        except TypeError:
            # Will fail if trying to check the truth value of a sympy expr
            pass
        if arrayNotation:
            result += '[%s]' % str(self.subset)
        if self.wcr is not None and str(self.wcr) != '':
            # Autodetect reduction type
            redtype = detect_reduction_type(self.wcr)
            if redtype == types.ReductionType.Custom:
                wcrstr = unparse(ast.parse(self.wcr).body[0].value.body)
            else:
                wcrstr = str(redtype)
                wcrstr = wcrstr[wcrstr.find('.') + 1:]  # Skip "ReductionType."

            result += ' (CR: %s' % wcrstr
            if self.wcr_identity is not None:
                result += ', id: %s' % str(self.wcr_identity)
            result += ')'

        if self.other_subset is not None:
            result += ' -> [%s]' % str(self.other_subset)
        return result

    def __repr__(self):
        return "Memlet (" + self.__str__() + ")"
Ejemplo n.º 8
0
class AccumulateTransient(transformation.Transformation):
    """ Implements the AccumulateTransient transformation, which adds
        transient stream and data nodes between nested maps that lead to a
        stream. The transient data nodes then act as a local accumulator.
    """

    map_exit = transformation.PatternNode(nodes.MapExit)
    outer_map_exit = transformation.PatternNode(nodes.MapExit)

    array_identity_dict = DictProperty(key_type=str,
                                       value_type=symbolic.pystr_to_symbolic,
                                       desc="dict with key: Array and"
                                       "value: the Identity value to set",
                                       default=dict(),
                                       allow_none=True)

    array = Property(
        dtype=str,
        desc="Array to create local storage for (if empty, first available)",
        default=None,
        allow_none=True)

    prefix = Property(dtype=str,
                      default="trans_",
                      allow_none=True,
                      desc='Prefix for new data node')

    identity = SymbolicProperty(desc="Identity value to set",
                                default=None,
                                allow_none=True)

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(AccumulateTransient.map_exit,
                                   AccumulateTransient.outer_map_exit)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        map_exit = graph.nodes()[candidate[AccumulateTransient.map_exit]]
        outer_map_exit = graph.nodes()[candidate[
            AccumulateTransient.outer_map_exit]]

        # Check if there is an accumulation output
        for e in graph.edges_between(map_exit, outer_map_exit):
            if e.data.wcr is not None:
                return True

        return False

    @staticmethod
    def match_to_str(graph, candidate):
        map_exit = candidate[AccumulateTransient.map_exit]
        outer_map_exit = candidate[AccumulateTransient.outer_map_exit]

        return ' -> '.join(str(node) for node in [map_exit, outer_map_exit])

    def apply(self, sdfg: SDFG):
        graph = sdfg.node(self.state_id)
        map_exit = graph.node(self.subgraph[AccumulateTransient.map_exit])
        outer_map_exit = graph.node(
            self.subgraph[AccumulateTransient.outer_map_exit])

        # Avoid import loop
        from dace.transformation.dataflow.local_storage import OutLocalStorage

        array_identity_dict = self.array_identity_dict

        # Choose array
        array = self.array
        if array is not None and len(array) != 0:
            array_identity_dict[array] = self.identity
        elif ((array is None or len(array) == 0)
              and len(array_identity_dict) == 0):
            array = next(e.data.data
                         for e in graph.edges_between(map_exit, outer_map_exit)
                         if e.data.wcr is not None)
            array_identity_dict[array] = self.identity

        transients: Dict[str, Any] = {}
        for array, identity in array_identity_dict.items():
            data_node: nodes.AccessNode = OutLocalStorage.apply_to(
                sdfg,
                dict(array=array, prefix=self.prefix),
                verify=False,
                save=False,
                node_a=map_exit,
                node_b=outer_map_exit)

            transients[data_node.data] = identity

            if identity is None:
                warnings.warn(
                    'AccumulateTransient did not properly initialize '
                    'newly-created transient!')
                return

        sdfg_state: SDFGState = sdfg.node(self.state_id)

        map_entry = sdfg_state.entry_node(map_exit)

        nested_sdfg: nodes.NestedSDFG = nest_state_subgraph(
            sdfg=sdfg,
            state=sdfg_state,
            subgraph=SubgraphView(
                sdfg_state, {map_entry, map_exit}
                | sdfg_state.all_nodes_between(map_entry, map_exit)))

        nested_sdfg_state: SDFGState = nested_sdfg.sdfg.nodes()[0]

        init_state = nested_sdfg.sdfg.add_state_before(nested_sdfg_state)

        for data_name, identity in transients.items():
            temp_array: Array = sdfg.arrays[data_name]

            init_state.add_mapped_tasklet(
                name='acctrans_init',
                map_ranges={
                    '_o%d' % i: '0:%s' % symbolic.symstr(d)
                    for i, d in enumerate(temp_array.shape)
                },
                inputs={},
                code='out = %s' % identity,
                outputs={
                    'out':
                    dace.Memlet.simple(
                        data=data_name,
                        subset_str=','.join([
                            '_o%d' % i for i, _ in enumerate(temp_array.shape)
                        ]))
                },
                external_edges=True)

        # TODO: use trivial map elimintation here when it will be merged to remove map if it has trivial ranges

        return nested_sdfg
Ejemplo n.º 9
0
class AccumulateTransient(transformation.SingleStateTransformation):
    """ Implements the AccumulateTransient transformation, which adds
        transient stream and data nodes between nested maps that lead to a
        stream. The transient data nodes then act as a local accumulator.
    """

    map_exit = transformation.PatternNode(nodes.MapExit)
    outer_map_exit = transformation.PatternNode(nodes.MapExit)

    array = Property(
        dtype=str,
        desc="Array to create local storage for (if empty, first available)",
        default=None,
        allow_none=True)

    identity = SymbolicProperty(desc="Identity value to set",
                                default=None,
                                allow_none=True)

    @classmethod
    def expressions(cls):
        return [sdutil.node_path_graph(cls.map_exit, cls.outer_map_exit)]

    def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
        map_exit = self.map_exit
        outer_map_exit = self.outer_map_exit

        # Check if there is an accumulation output
        for e in graph.edges_between(map_exit, outer_map_exit):
            if e.data.wcr is not None:
                return True

        return False

    def apply(self, graph: SDFGState, sdfg: SDFG):
        map_exit = self.map_exit
        outer_map_exit = self.outer_map_exit

        # Choose array
        array = self.array
        if array is None or len(array) == 0:
            array = next(e.data.data
                         for e in graph.edges_between(map_exit, outer_map_exit)
                         if e.data.wcr is not None)

        # Avoid import loop
        from dace.transformation.dataflow.local_storage import OutLocalStorage

        data_node: nodes.AccessNode = OutLocalStorage.apply_to(
            sdfg,
            dict(array=array),
            verify=False,
            save=False,
            node_a=map_exit,
            node_b=outer_map_exit)

        if self.identity is None:
            warnings.warn('AccumulateTransient did not properly initialize '
                          'newly-created transient!')
            return

        sdfg_state: SDFGState = sdfg.node(self.state_id)

        map_entry = sdfg_state.entry_node(map_exit)

        nested_sdfg: NestedSDFG = nest_state_subgraph(
            sdfg=sdfg,
            state=sdfg_state,
            subgraph=SubgraphView(
                sdfg_state, {map_entry, map_exit}
                | sdfg_state.all_nodes_between(map_entry, map_exit)))

        nested_sdfg_state: SDFGState = nested_sdfg.sdfg.nodes()[0]

        init_state = nested_sdfg.sdfg.add_state_before(nested_sdfg_state)

        temp_array: Array = sdfg.arrays[data_node.data]

        init_state.add_mapped_tasklet(
            name='acctrans_init',
            map_ranges={
                '_o%d' % i: '0:%s' % symstr(d)
                for i, d in enumerate(temp_array.shape)
            },
            inputs={},
            code='out = %s' % self.identity,
            outputs={
                'out':
                dace.Memlet.simple(data=data_node.data,
                                   subset_str=','.join([
                                       '_o%d' % i
                                       for i, _ in enumerate(temp_array.shape)
                                   ]))
            },
            external_edges=True)
Ejemplo n.º 10
0
class GPUTransformMap(transformation.Transformation):
    """ Implements the GPUTransformMap transformation.

        Converts a single map to a GPU-scheduled map and creates GPU arrays
        outside it, generating CPU<->GPU memory copies automatically.
    """

    fullcopy = Property(desc="Copy whole arrays rather than used subset",
                        dtype=bool,
                        default=False)

    toplevel_trans = Property(desc="Make all GPU transients top-level",
                              dtype=bool,
                              default=False)

    register_trans = Property(
        desc="Make all transients inside GPU maps registers",
        dtype=bool,
        default=False)

    gpu_id = SymbolicProperty(default=None,
                              allow_none=True,
                              desc="Selects which gpu the map should run on")

    sequential_innermaps = Property(desc="Make all internal maps Sequential",
                                    dtype=bool,
                                    default=False)

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    import dace.libraries.standard as stdlib  # Avoid import loop
    _reduce = stdlib.Reduce('lambda: None', None)

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(GPUTransformMap._map_entry),
            sdutil.node_path_graph(GPUTransformMap._reduce)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        if expr_index == 0:
            map_entry = graph.nodes()[candidate[GPUTransformMap._map_entry]]
            candidate_map = map_entry.map

            # Map schedules that are disallowed to transform to GPUs
            if (candidate_map.schedule
                    in [dtypes.ScheduleType.MPI] + dtypes.GPU_SCHEDULES):
                return False
            if sd.is_devicelevel_gpu(sdfg, graph, map_entry):
                return False

            # Dynamic map ranges cannot become kernels
            if sd.has_dynamic_map_inputs(graph, map_entry):
                return False

            # Ensure that map does not include internal arrays that are
            # allocated on non-default space
            subgraph = graph.scope_subgraph(map_entry)
            for node in subgraph.nodes():
                if (isinstance(node, nodes.AccessNode) and
                        node.desc(sdfg).storage != dtypes.StorageType.Default
                        and
                        node.desc(sdfg).storage != dtypes.StorageType.Register):
                    return False

            # If one of the outputs is a stream, do not match
            map_exit = graph.exit_node(map_entry)
            for edge in graph.out_edges(map_exit):
                dst = graph.memlet_path(edge)[-1].dst
                if (isinstance(dst, nodes.AccessNode)
                        and isinstance(sdfg.arrays[dst.data], data.Stream)):
                    return False

            return True
        elif expr_index == 1:
            reduce = graph.nodes()[candidate[GPUTransformMap._reduce]]

            # Disallow GPU transformation if already in device-level code
            if sd.is_devicelevel_gpu(sdfg, graph, reduce):
                return False

            return True

    @staticmethod
    def match_to_str(graph, candidate):
        if GPUTransformMap._reduce in candidate:
            return str(graph.nodes()[candidate[GPUTransformMap._reduce]])
        else:
            return str(graph.nodes()[candidate[GPUTransformMap._map_entry]])

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        if self.expr_index == 0:
            map_entry = graph.nodes()[self.subgraph[GPUTransformMap._map_entry]]
            nsdfg_node = helpers.nest_state_subgraph(
                sdfg,
                graph,
                graph.scope_subgraph(map_entry),
                full_data=self.fullcopy)
        else:
            cnode = graph.nodes()[self.subgraph[GPUTransformMap._reduce]]
            nsdfg_node = helpers.nest_state_subgraph(sdfg,
                                                     graph,
                                                     SubgraphView(
                                                         graph, [cnode]),
                                                     full_data=self.fullcopy)

        # Avoiding import loops
        from dace.transformation.interstate import GPUTransformSDFG
        transformation = GPUTransformSDFG(0, 0, {}, 0)
        transformation.register_trans = self.register_trans
        transformation.sequential_innermaps = self.sequential_innermaps
        transformation.toplevel_trans = self.toplevel_trans
        transformation.gpu_id = self.gpu_id

        transformation.apply(nsdfg_node.sdfg)

        # Inline back as necessary
        sdfg.apply_strict_transformations()
Ejemplo n.º 11
0
class Reduce(nodes.LibraryNode):

    # Global properties
    implementations = {
        "NCCL": ExpandReduceNCCL,
    }
    default_implementation = "NCCL"

    # Object fields
    wcr = LambdaProperty(default='lambda a, b: a + b')
    root = SymbolicProperty(default=0,
                            allow_none=True,
                            desc="The gpu on which the receive buffer resides")

    def __init__(self,
                 wcr="lambda a, b: a + b",
                 root: symbolic.SymbolicType = 0,
                 debuginfo=None,
                 *args,
                 **kwargs):

        super().__init__(name='nccl_Reduce', *args, **kwargs)
        self.wcr = wcr
        self.root = root
        self.schedule = dtypes.ScheduleType.GPU_Multidevice
        self.debuginfo = debuginfo

    @staticmethod
    def from_json(json_obj, context=None):
        ret = Reduce("lambda a, b: a + b", None)
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)
        return ret

    def __str__(self):
        redtype = self.reduction_type

        wcr_str = str(redtype)
        wcr_str = wcr_str[wcr_str.find('.') + 1:]  # Skip "ReductionType."

        return f'nccl_Reduce({wcr_str})'

    @property
    def reduction_type(self):
        # Autodetect reduction type
        redtype = detect_reduction_type(self.wcr)
        if redtype not in nutil.NCCL_SUPPORTED_OPERATIONS:
            raise ValueError(
                'NCCL only supports sum, product, min and max operations.')
        return redtype

    def validate(self, sdfg: SDFG, state: SDFGState):
        redtype = self.reduction_type

        in_edges = state.in_edges(self)
        if len(in_edges) not in [1, 2]:
            raise ValueError("NCCL Reduce must have one or two inputs.")

        out_edges = state.out_edges(self)
        if len(out_edges) not in [1, 2]:
            raise ValueError("NCCL Reduce must have one or two outputs.")

    @property
    def free_symbols(self) -> Set[str]:
        result = super().free_symbols
        result.update(map(str, self.root.free_symbols))
        return result
Ejemplo n.º 12
0
class Memlet(object):
    """ Data movement object. Represents the data, the subset moved, and the
        manner it is reindexed (`other_subset`) into the destination.
        If there are multiple conflicting writes, this object also specifies
        how they are resolved with a lambda function.
    """

    # Properties
    volume = SymbolicProperty(default=0,
                              desc='The exact number of elements moved '
                              'using this memlet, or the maximum number '
                              'if dynamic=True (with 0 as unbounded)')
    dynamic = Property(default=False,
                       dtype=bool,
                       desc='Is the number of elements moved determined at '
                       'runtime (e.g., data dependent)')
    subset = SubsetProperty(allow_none=True,
                            desc='Subset of elements to move from the data '
                            'attached to this edge.')
    other_subset = SubsetProperty(
        allow_none=True,
        desc='Subset of elements after reindexing to the data not attached '
        'to this edge (e.g., for offsets and reshaping).')
    data = DataProperty(desc='Data descriptor attached to this memlet')
    wcr = LambdaProperty(allow_none=True,
                         desc='If set, defines a write-conflict resolution '
                         'lambda function. The syntax of the lambda function '
                         'receives two elements: `current` value and `new` '
                         'value, and returns the value after resolution')

    # Code generation and validation hints
    debuginfo = DebugInfoProperty(desc='Line information to track source and '
                                  'generated code')
    wcr_nonatomic = Property(dtype=bool,
                             default=False,
                             desc='If True, always generates non-conflicting '
                             '(non-atomic) writes in resulting code')
    allow_oob = Property(dtype=bool,
                         default=False,
                         desc='Bypass out-of-bounds validation')

    def __init__(self,
                 expr: str = None,
                 data: str = None,
                 subset: Union[str, subsets.Subset] = None,
                 other_subset: Union[str, subsets.Subset] = None,
                 volume: Union[int, str, symbolic.SymbolicType] = None,
                 dynamic: bool = False,
                 wcr: Union[str, ast.AST] = None,
                 debuginfo: dtypes.DebugInfo = None,
                 wcr_nonatomic: bool = False,
                 allow_oob: bool = False):
        """ 
        Constructs a Memlet.
        :param expr: A string expression of the this memlet, given as an ease
                     of use API. Must follow one of the following forms:
                     1. ``ARRAY``,
                     2. ``ARRAY[SUBSET]``,
                     3. ``ARRAY[SUBSET] -> OTHER_SUBSET``.
        :param data: (DEPRECATED) Data descriptor name attached to this memlet.
        :param subset: The subset to take from the data attached to the edge,
                       represented either as a string or a Subset object.
        :param other_subset: The subset to offset into the other side of the
                             memlet, represented either as a string or a Subset 
                             object.
        :param volume: The exact number of elements moved using this
                       memlet, or the maximum number of elements if
                       ``dynamic`` is set to True. If dynamic and this
                       value is set to zero, the number of elements moved
                       is runtime-defined and unbounded.
        :param dynamic: If True, the number of elements moved in this memlet
                        is defined dynamically at runtime.
        :param wcr: A lambda function (represented as a string or Python AST) 
                    specifying how write-conflicts are resolved. The syntax
                    of the lambda function receives two elements: ``current`` 
                    value and `new` value, and returns the value after 
                    resolution. For example, summation is represented by
                    ``'lambda cur, new: cur + new'``.
        :param debuginfo: Line information from the generating source code.
        :param wcr_nonatomic: If True, overrides the automatic code generator 
                              decision and treat all write-conflict resolution
                              operations as non-atomic, which might cause race
                              conditions in the general case.
        :param allow_oob: If True, bypasses the checks in SDFG validation for
                          out-of-bounds accesses in memlet subsets.
        """

        # Will be set once memlet is added into an SDFG (in try_initialize)
        self._sdfg = None
        self._state = None
        self._edge = None

        # Field caching which subset belongs to source or destination of memlet
        self._is_data_src = None

        # Initialize first by string expression
        self.data = None
        self.subset = None
        self.other_subset = None
        if expr is not None:
            self._parse_memlet_from_str(expr)

        # Set properties
        self.data = self.data or data
        self.subset = self.subset or subset
        self.other_subset = self.other_subset or other_subset

        if volume is not None:
            self.volume = volume
        else:
            if self.subset is not None:
                self.volume = self.subset.num_elements()
            elif self.other_subset is not None:
                self.volume = self.other_subset.num_elements()
            else:
                self.volume = 1

        self.dynamic = dynamic
        self.wcr = wcr
        self.wcr_nonatomic = wcr_nonatomic
        self.debuginfo = debuginfo
        self.allow_oob = allow_oob

    def to_json(self):
        attrs = dace.serialize.all_properties_to_json(self)

        # Fill in new values
        if self.src_subset is not None:
            attrs['src_subset'] = self.src_subset.to_json()
        else:
            attrs['src_subset'] = None
        if self.dst_subset is not None:
            attrs['dst_subset'] = self.dst_subset.to_json()
        else:
            attrs['dst_subset'] = None

        # Fill in legacy (DEPRECATED) values for backwards compatibility
        attrs['num_accesses'] = \
            str(self.volume) if not self.dynamic else -1

        return {"type": "Memlet", "attributes": attrs}

    @staticmethod
    def from_json(json_obj, context=None):
        ret = Memlet()
        dace.serialize.set_properties_from_json(
            ret,
            json_obj,
            context=context,
            ignore_properties={'src_subset', 'dst_subset', 'num_accesses'})
        if context:
            ret._sdfg = context['sdfg']
            ret._state = context['sdfg_state']
        return ret

    def __deepcopy__(self, memo):
        node = object.__new__(Memlet)

        # Set properties
        node._volume = dcpy(self._volume, memo=memo)
        node._dynamic = self._dynamic
        node._subset = dcpy(self._subset, memo=memo)
        node._other_subset = dcpy(self._other_subset, memo=memo)
        node._data = dcpy(self._data, memo=memo)
        node._wcr = dcpy(self._wcr, memo=memo)
        node._wcr_nonatomic = dcpy(self._wcr_nonatomic, memo=memo)
        node._debuginfo = dcpy(self._debuginfo, memo=memo)
        node._wcr_nonatomic = self._wcr_nonatomic
        node._allow_oob = self._allow_oob
        node._is_data_src = self._is_data_src

        # Nullify graph references
        node._sdfg = None
        node._state = None
        node._edge = None

        return node

    def is_empty(self) -> bool:
        """ 
        Returns True if this memlet carries no data. Memlets without data are
        primarily used for connecting nodes to scopes without transferring 
        data to them. 
        """
        return (self.data is None and self.src_subset is None
                and self.dst_subset is None)

    @property
    def num_accesses(self):
        """ 
        Returns the total memory movement volume (in elements) of this memlet.
        """
        return self.volume

    @num_accesses.setter
    def num_accesses(self, value):
        self.volume = value

    @staticmethod
    def simple(data,
               subset_str,
               wcr_str=None,
               other_subset_str=None,
               wcr_conflict=True,
               num_accesses=None,
               debuginfo=None,
               dynamic=False):
        """ DEPRECATED: Constructs a Memlet from string-based expressions.
            :param data: The data object or name to access. 
            :type data: Either a string of the data descriptor name or an
                        AccessNode.
            :param subset_str: The subset of `data` that is going to
                               be accessed in string format. Example: '0:N'.
            :param wcr_str: A lambda function (as a string) specifying
                            how write-conflicts are resolved. The syntax
                            of the lambda function receives two elements:
                            `current` value and `new` value,
                            and returns the value after resolution. For
                            example, summation is
                            `'lambda cur, new: cur + new'`.
            :param other_subset_str: The reindexing of `subset` on the other
                                     connected data (as a string).
            :param wcr_conflict: If False, forces non-locked conflict
                                 resolution when generating code. The default
                                 is to let the code generator infer this
                                 information from the SDFG.
            :param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 -1, designates that the number of accesses is
                                 unknown at compile time.
            :param debuginfo: Source-code information (e.g., line, file)
                              used for debugging.
            :param dynamic: If True, the number of elements moved in this memlet
                            is defined dynamically at runtime.
        """
        # warnings.warn(
        #     'This function is deprecated, please use the Memlet '
        #     'constructor instead', DeprecationWarning)

        result = Memlet()

        if isinstance(subset_str, subsets.Subset):
            result.subset = subset_str
        else:
            result.subset = SubsetProperty.from_string(subset_str)

        result.dynamic = dynamic

        if num_accesses is not None:
            if num_accesses == -1:
                result.dynamic = True
                result.volume = 0
            else:
                result.volume = num_accesses
        else:
            result.volume = result._subset.num_elements()

        if wcr_str is not None:
            if isinstance(wcr_str, ast.AST):
                result.wcr = wcr_str
            else:
                result.wcr = LambdaProperty.from_string(wcr_str)

        if other_subset_str is not None:
            if isinstance(other_subset_str, subsets.Subset):
                result.other_subset = other_subset_str
            else:
                result.other_subset = SubsetProperty.from_string(
                    other_subset_str)
        else:
            result.other_subset = None

        # If it is an access node or another memlet
        if hasattr(data, 'data'):
            result.data = data.data
        else:
            result.data = data

        result.wcr_nonatomic = not wcr_conflict

        return result

    def _parse_from_subexpr(self, expr: str):
        if expr[-1] != ']':  # No subset given, try to use whole array
            if not dtypes.validate_name(expr):
                raise SyntaxError('Invalid memlet syntax "%s"' % expr)
            return expr, None

        # array[subset] syntax
        arrname, subset_str = expr[:-1].split('[')
        if not dtypes.validate_name(arrname):
            raise SyntaxError('Invalid array name "%s" in memlet' % arrname)
        return arrname, SubsetProperty.from_string(subset_str)

    def _parse_memlet_from_str(self, expr: str):
        """
        Parses a memlet and fills in either the src_subset,dst_subset fields
        or the _data,_subset fields.
        :param expr: A string expression of the this memlet, given as an ease
                of use API. Must follow one of the following forms:
                1. ``ARRAY``,
                2. ``ARRAY[SUBSET]``,
                3. ``ARRAY[SUBSET] -> OTHER_SUBSET``.
                Note that modes 2 and 3 are deprecated and will leave 
                the memlet uninitialized until inserted into an SDFG.
        """
        expr = expr.strip()
        if '->' not in expr:  # Options 1 and 2
            self.data, self.subset = self._parse_from_subexpr(expr)
            return

        # Option 3
        src_expr, dst_expr = expr.split('->')
        src_expr = src_expr.strip()
        dst_expr = dst_expr.strip()
        if '[' not in src_expr and not dtypes.validate_name(src_expr):
            raise SyntaxError('Expression without data name not yet allowed')

        self.data, self.subset = self._parse_from_subexpr(src_expr)
        self.other_subset = SubsetProperty.from_string(dst_expr)

    def try_initialize(self, sdfg: 'dace.sdfg.SDFG',
                       state: 'dace.sdfg.SDFGState',
                       edge: 'dace.sdfg.graph.MultiConnectorEdge'):
        """ 
        Tries to initialize the internal fields of the memlet (e.g., src/dst 
        subset) once it is added to an SDFG as an edge.
        """
        from dace.sdfg.nodes import AccessNode, CodeNode  # Avoid import loops
        self._sdfg = sdfg
        self._state = state
        self._edge = edge

        # If memlet is code->code, ensure volume=1
        if (isinstance(edge.src, CodeNode) and isinstance(edge.dst, CodeNode)
                and self.volume == 0):
            self.volume = 1

        # Find source/destination of memlet
        try:
            path = state.memlet_path(edge)
        except (ValueError, AssertionError, StopIteration):
            # Cannot initialize yet
            return

        is_data_src = True
        if isinstance(path[-1].dst, AccessNode):
            if path[-1].dst.data == self._data:
                is_data_src = False
        self._is_data_src = is_data_src

        # If subset is None, fill in with entire array
        if (self.data is not None and self.subset is None):
            self.subset = subsets.Range.from_array(sdfg.arrays[self.data])

    def get_src_subset(self, edge: 'dace.sdfg.graph.MultiConnectorEdge',
                       state: 'dace.sdfg.SDFGState'):
        self.try_initialize(state.parent, state, edge)
        return self.src_subset

    def get_dst_subset(self, edge: 'dace.sdfg.graph.MultiConnectorEdge',
                       state: 'dace.sdfg.SDFGState'):
        self.try_initialize(state.parent, state, edge)
        return self.dst_subset

    @staticmethod
    def from_array(dataname, datadesc, wcr=None):
        """ Constructs a Memlet that transfers an entire array's contents.
            :param dataname: The name of the data descriptor in the SDFG.
            :param datadesc: The data descriptor object.
            :param wcr: The conflict resolution lambda.
            :type datadesc: Data
        """
        rng = subsets.Range.from_array(datadesc)
        return Memlet.simple(dataname, rng, wcr_str=wcr)

    def __hash__(self):
        return hash(
            (self.volume, self.src_subset, self.dst_subset, str(self.wcr)))

    def __eq__(self, other):
        return all([
            self.volume == other.volume, self.src_subset == other.src_subset,
            self.dst_subset == other.dst_subset, self.wcr == other.wcr
        ])

    def replace(self, repl_dict):
        """ Substitute a given set of symbols with a different set of symbols.
            :param repl_dict: A dict of string symbol names to symbols with
                              which to replace them.
        """
        repl_to_intermediate = {}
        repl_to_final = {}
        for symbol in repl_dict:
            if str(symbol) != str(repl_dict[symbol]):
                intermediate = symbolic.symbol('__dacesym_' + str(symbol))
                repl_to_intermediate[symbolic.symbol(symbol)] = intermediate
                repl_to_final[intermediate] = repl_dict[symbol]

        if len(repl_to_intermediate) > 0:
            if self.volume is not None and symbolic.issymbolic(self.volume):
                self.volume = self.volume.subs(repl_to_intermediate)
                self.volume = self.volume.subs(repl_to_final)
            if self.subset is not None:
                self.subset.replace(repl_to_intermediate)
                self.subset.replace(repl_to_final)
            if self.other_subset is not None:
                self.other_subset.replace(repl_to_intermediate)
                self.other_subset.replace(repl_to_final)

    def num_elements(self):
        """ Returns the number of elements in the Memlet subset. """
        if self.subset:
            return self.subset.num_elements()
        elif self.other_subset:
            return self.other_subset.num_elements()
        return 0

    def bounding_box_size(self):
        """ Returns a per-dimension upper bound on the maximum number of
            elements in each dimension.

            This bound will be tight in the case of Range.
        """
        if self.src_subset:
            return self.src_subset.bounding_box_size()
        elif self.dst_subset:
            return self.dst_subset.bounding_box_size()
        return []

    # New fields
    @property
    def src_subset(self):
        if self._is_data_src is not None:
            return self.subset if self._is_data_src else self.other_subset
        return self.subset

    @src_subset.setter
    def src_subset(self, new_src_subset):
        if self._is_data_src is not None:
            if self._is_data_src:
                self.subset = new_src_subset
            else:
                self.other_subset = new_src_subset
        else:
            self.subset = new_src_subset

    @property
    def dst_subset(self):
        if self._is_data_src is not None:
            return self.other_subset if self._is_data_src else self.subset
        return self.other_subset

    @dst_subset.setter
    def dst_subset(self, new_dst_subset):
        if self._is_data_src is not None:
            if self._is_data_src:
                self.other_subset = new_dst_subset
            else:
                self.subset = new_dst_subset
        else:
            self.other_subset = new_dst_subset

    def validate(self, sdfg, state):
        if self.data is not None and self.data not in sdfg.arrays:
            raise KeyError('Array "%s" not found in SDFG' % self.data)

    @property
    def free_symbols(self) -> Set[str]:
        """ Returns a set of symbols used in this edge's properties. """
        # Symbolic properties are in volume, and the two subsets
        result = set()
        result |= set(map(str, self.volume.free_symbols))
        if self.src_subset:
            result |= self.src_subset.free_symbols
        if self.dst_subset:
            result |= self.dst_subset.free_symbols
        return result

    def __label__(self, sdfg, state):
        """ Returns a string representation of the memlet for display in a
            graph.

            :param sdfg: The SDFG in which the memlet resides.
            :param state: An SDFGState object in which the memlet resides.
        """
        if self.data is None:
            return self._label(None)
        return self._label(sdfg.arrays[self.data].shape)

    def __str__(self):
        return self._label(None)

    def _label(self, shape):
        result = ''
        if self.data is not None:
            result = self.data

        if self.subset is None:
            return result

        num_elements = self.subset.num_elements()
        if self.dynamic:
            result += '(dyn) '
        elif self.volume != num_elements:
            result += '(%s) ' % SymbolicProperty.to_string(self.volume)
        arrayNotation = True
        try:
            if shape is not None and reduce(operator.mul, shape, 1) == 1:
                # Don't mention array if we're accessing a single element and it's zero
                if all(s == 0 for s in self.subset.min_element()):
                    arrayNotation = False
        except TypeError:
            # Will fail if trying to check the truth value of a sympy expr
            pass
        if arrayNotation:
            result += '[%s]' % str(self.subset)
        if self.wcr is not None and str(self.wcr) != '':
            # Autodetect reduction type
            redtype = detect_reduction_type(self.wcr)
            if redtype == dtypes.ReductionType.Custom:
                wcrstr = unparse(ast.parse(self.wcr).body[0].value.body)
            else:
                wcrstr = str(redtype)
                wcrstr = wcrstr[wcrstr.find('.') + 1:]  # Skip "ReductionType."

            result += ' (CR: %s)' % wcrstr

        if self.other_subset is not None:
            result += ' -> [%s]' % str(self.other_subset)
        return result

    def __repr__(self):
        return "Memlet (" + self.__str__() + ")"
Ejemplo n.º 13
0
Archivo: ger.py Proyecto: mfkiwl/dace
class Ger(LibraryNode):
    """
    Implements the BLAS operation GER, which computes alpha*x*y^T + A (i.e.,
    the outer product of two vectors x and y added to a matrix A).

    The node expects input connectors "_x", "_y", and "_A", and output
    connector "_res".
    """

    # Global properties
    implementations = {"pure": ExpandGerPure, "FPGA": ExpandGerFpga}
    default_implementation = None

    # Object fields
    n_tile_size = dace.properties.SymbolicProperty(allow_none=False, default=1)
    m_tile_size = dace.properties.SymbolicProperty(allow_none=False, default=1)

    n = dace.properties.SymbolicProperty(allow_none=False, default=dace.symbolic.symbol("n"))
    m = dace.properties.SymbolicProperty(allow_none=False, default=dace.symbolic.symbol("m"))

    alpha = SymbolicProperty(
        default=1, desc="A scalar which will be multiplied with the outer product x*yT before adding matrix A")

    def __init__(self, name, n=dace.symbolic.symbol("n"), m=dace.symbolic.symbol("m"), alpha=1, location=None):
        super().__init__(name, location=location, inputs={"_x", "_y", "_A"}, outputs={"_res"})

        self.n = n
        self.m = m

        self.alpha = alpha

    def compare(self, other):

        if (self.implementation == other.implementation and self.n_tile_size == other.n_tile
                and self.m_tile_size == other.m_tile):

            return True
        else:
            return False

    def validate(self, sdfg, state):

        desc_a = None
        desc_x = None
        desc_y = None
        size_a = None
        size_x = None
        size_y = None
        for _, _, _, dst_conn, memlet in state.in_edges(self):
            if dst_conn == "_A":
                subset = copy.deepcopy(memlet.subset)
                subset.squeeze()
                size_a = subset.size()
                desc_a = sdfg.arrays[memlet.data]
            if dst_conn == "_x":
                subset = copy.deepcopy(memlet.subset)
                subset.squeeze()
                size_x = subset.size()
                desc_x = sdfg.arrays[memlet.data]
            if dst_conn == "_y":
                subset = copy.deepcopy(memlet.subset)
                subset.squeeze()
                size_y = subset.size()
                desc_y = sdfg.arrays[memlet.data]

        if size_a is None or size_x is None:
            raise ValueError("Expected at least two inputs to Ger " "(matrix A and vector x)")

        if size_y is None:
            raise ValueError("Expected exactly one output from Ger (vector y).")

        # The following checks don't work for streams
        if (not isinstance(desc_x, dt.Array) or not isinstance(desc_y, dt.Array) or not isinstance(desc_a, dt.Array)):
            return

        if len(size_a) != 2:
            raise ValueError("A must be a matrix")
        if len(size_x) != 1:
            raise ValueError("x must be a vector")
        if len(size_y) != 1:
            raise ValueError("y must be a vector")

        if size_a[0] != size_x[0] or size_a[1] != size_y[0]:
            raise ValueError("Input vectors x and y (outer product) must match with the matrix A dimensions.")

        out_edges = state.out_edges(self)
        if len(out_edges) != 1:
            raise ValueError("Expected exactly one output from ger rank 1 operation.")
        out_memlet = out_edges[0].data

        out_subset = copy.deepcopy(out_memlet.subset)
        out_subset.squeeze()
        size_out = out_subset.size()

        if (len(size_out) != 2 or size_out[0] != size_a[0] or size_out[1] != size_a[1]):
            raise ValueError("Output matrix must match input matrix a and outer product x*yT.")

        return desc_a, desc_x, desc_y
Ejemplo n.º 14
0
class ProcessGrid(object):
    """
    Process-grids implement cartesian topologies similarly to cartesian communicators created with [MPI_Cart_create](https://www.mpich.org/static/docs/latest/www3/MPI_Cart_create.html)
    and [MPI_Cart_sub](https://www.mpich.org/static/docs/v3.2/www3/MPI_Cart_sub.html).

    The boolean property `is_subgrid` provides a switch between "parent" process-grids (equivalent to communicators
    create with `MPI_Cart_create`) and sub-grids (equivalent to communicators created with `MPI_Cart_sub`).
    
    If `is_subgrid` is false, a "parent" process-grid is created. The `shape` property is equivalent to the `dims`
    parameter of `MPI_Cart_create`. The other properties are ignored. All "parent" process-grids spawn out of
    `MPI_COMM_WORLD`, while their `periods` and `reorder` parameters are set to False.

    If `is_subgrid` is true, then the `parent_grid` is partitioned to lower-dimensional cartesian sub-grids (for more
    details, see the documentation of `MPI_Cart_sub`). The `parent_grid` property is equivalent to the `comm` parameter
    of `MPI_Cart_sub`. The `color` property corresponds to the `remain_dims` parameter of `MPI_Cart_sub`, i.e., the i-th
    entry specifies whether the i-th dimension is kep in the sub-grid or is dropped.
    
    The following properties store information used in the redistribution of data:

    The `exact_grid` property is either None or the rank of an MPI process in the `parent_grid`. If set then, out of all
    the sub-grids created, only the one that contains this rank is used for collective communication. The `root`
    property is used to select the root rank for purposed of collective communication (by default 0).
    """

    name = Property(dtype=str, desc="The process-grid's name.")
    is_subgrid = Property(
        dtype=bool,
        default=False,
        desc="If true, spanws sub-grids out of the parent process-grid.")
    shape = ShapeProperty(default=[], desc="The process-grid's shape.")
    parent_grid = Property(
        dtype=str,
        allow_none=True,
        default=None,
        desc="Name of the parent process-grid "
        "(mandatory if `is_subgrid` is true, otherwise ignored).")
    color = ListProperty(
        int,
        allow_none=True,
        default=None,
        desc=
        "The i-th entry specifies whether the i-th dimension is kept in the sub-grid or is "
        "dropped (mandatory if `is_subgrid` is true, otherwise ignored).")
    exact_grid = SymbolicProperty(
        allow_none=True,
        default=None,
        desc=
        "If set then, out of all the sub-grids created, only the one that contains the "
        "rank with id `exact_grid` will be utilized for collective communication "
        "(optional if `is_subgrid` is true, otherwise ignored).")
    root = SymbolicProperty(default=0,
                            desc="The root rank for collective communication.")

    def __init__(self,
                 name: str,
                 is_subgrid: bool,
                 shape: ShapeType = None,
                 parent_grid: str = None,
                 color: Sequence[Union[Integral, bool]] = None,
                 exact_grid: RankType = None,
                 root: RankType = 0):
        self.name = name
        self.is_subgrid = is_subgrid
        if is_subgrid:
            self.parent_grid = parent_grid.name
            self.color = color
            self.exact_grid = exact_grid
            self.shape = [
                parent_grid.shape[i] for i, remain in enumerate(color)
                if remain
            ]
        else:
            self.shape = shape
        self.root = root
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if self.is_subgrid:
            if not self.parent_grid or len(self.parent_grid) == 0:
                raise ValueError(
                    'Sub-grid misses its corresponding parent process-grid')
        if any(not isinstance(s, (Integral, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError(
                'Shape must be a list or tuple of integer values or symbols')
        if self.color and any(c < 0 or c > 1 for c in self.color):
            raise ValueError(
                'Color must have only logical true (1) or false (0) values.')
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)
        retdict = {"type": type(self).__name__, "attributes": attrs}
        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls('tmp', False, [])
        serialize.set_properties_from_json(ret, json_obj, context=context)
        # Check validity now
        ret.validate()
        return ret

    def init_code(self):
        """ Outputs MPI allocation/initialization code for the process-grid.
            It is assumed that the following variables exist in the SDFG program's state:
            - MPI_Comm {self.name}_comm
            - MPI_Group {self.name}_group
            - int {self.name}_rank
            - int {self.name}_size
            - int* {self.name}_dims
            - int* {self.name}_remain
            - int* {self.name}_coords
            - bool {self.name})_valid

            These variables are typically added to the program's state through a Tasklet, e.g., the Dummy MPI node (for
            more details, check the DaCe MPI library in `dace/libraries/mpi`).

        """
        if self.is_subgrid:
            tmp = ""
            for i, s in enumerate(self.shape):
                tmp += f"__state->{self.name}_dims[{i}] = {s};\n"
            tmp += f"""
                __state->{self.name}_valid = false;
                if (__state->{self.parent_grid}_valid) {{
                    int {self.name}_remain[{len(self.color)}] = {{{', '.join(['1' if c else '0' for c in self.color])}}};
                    MPI_Cart_sub(__state->{self.parent_grid}_comm, {self.name}_remain, &__state->{self.name}_comm);
                    MPI_Comm_group(__state->{self.name}_comm, &__state->{self.name}_group);
                    MPI_Comm_rank(__state->{self.name}_comm, &__state->{self.name}_rank);
                    MPI_Comm_size(__state->{self.name}_comm, &__state->{self.name}_size);
                    MPI_Cart_coords(__state->{self.name}_comm, __state->{self.name}_rank, {len(self.shape)}, __state->{self.name}_coords);
            """
            if self.exact_grid is not None:
                tmp += f"""
                    int ranks1[1] = {{{self.exact_grid}}};
                    int ranks2[1];
                    MPI_Group_translate_ranks(__state->{self.parent_grid}_group, 1, ranks1, __state->{self.name}_group, ranks2);
                    __state->{self.name}_valid = (ranks2[0] != MPI_PROC_NULL && ranks2[0] != MPI_UNDEFINED);
                }}
                """
            else:
                tmp += f"""
                    __state->{self.name}_valid = true;
                }}
                """
            return tmp
        else:
            tmp = ""
            for i, s in enumerate(self.shape):
                tmp += f"__state->{self.name}_dims[{i}] = {s};\n"
            tmp += f"""
                int {self.name}_periods[{len(self.shape)}] = {{0}};
                MPI_Cart_create(MPI_COMM_WORLD, {len(self.shape)}, __state->{self.name}_dims, {self.name}_periods, 0, &__state->{self.name}_comm);
                if (__state->{self.name}_comm != MPI_COMM_NULL) {{
                    MPI_Comm_group(__state->{self.name}_comm, &__state->{self.name}_group);
                    MPI_Comm_rank(__state->{self.name}_comm, &__state->{self.name}_rank);
                    MPI_Comm_size(__state->{self.name}_comm, &__state->{self.name}_size);
                    MPI_Cart_coords(__state->{self.name}_comm, __state->{self.name}_rank, {len(self.shape)}, __state->{self.name}_coords);
                    __state->{self.name}_valid = true;
                }} else {{
                    __state->{self.name}_group = MPI_GROUP_NULL;
                    __state->{self.name}_rank = MPI_PROC_NULL;
                    __state->{self.name}_size = 0;
                    __state->{self.name}_valid = false;
                }}
            """
            return tmp

    def exit_code(self):
        """ Outputs MPI deallocation code for the process-grid. """
        return f"""
Ejemplo n.º 15
0
Archivo: data.py Proyecto: tbennun/dace
class Stream(Data):
    """ Stream (or stream array) data descriptor. """

    # Properties
    strides = ListProperty(element_type=symbolic.pystr_to_symbolic)
    offset = ListProperty(element_type=symbolic.pystr_to_symbolic)
    buffer_size = SymbolicProperty(desc="Size of internal buffer.")
    veclen = Property(dtype=int,
                      desc="Vector length. Memlets must adhere to this.")

    def __init__(self,
                 dtype,
                 veclen,
                 buffer_size,
                 shape=None,
                 transient=False,
                 storage=dace.dtypes.StorageType.Default,
                 location='',
                 strides=None,
                 offset=None,
                 toplevel=False,
                 debuginfo=None):

        if shape is None:
            shape = (1, )

        self.veclen = veclen
        self.buffer_size = buffer_size

        if strides is not None:
            if len(strides) != len(shape):
                raise TypeError('Strides must be the same size as shape')
            self.strides = cp.copy(strides)
        else:
            self.strides = cp.copy(list(shape))

        if offset is not None:
            if len(offset) != len(shape):
                raise TypeError('Offset must be the same size as shape')
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        super(Stream, self).__init__(dtype, shape, transient, storage,
                                     location, toplevel, debuginfo)

    def to_json(self):
        attrs = dace.serialize.all_properties_to_json(self)

        # Take care of symbolic expressions
        attrs['strides'] = list(map(str, attrs['strides']))

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @staticmethod
    def from_json(json_obj, context=None):
        if json_obj['type'] != "Stream":
            raise TypeError("Invalid data type")

        # Create dummy object
        ret = Stream(dace.dtypes.int8, 1, 1)
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)
        # TODO: FIXME:
        # Since the strides are a list-property (normal Property()),
        # loading from/to string (and, consequently, from/to json)
        # leads to validation errors (contains Strings/Integers, not sympy symbols).
        # To fix this, it needs a custom class
        # For now, this is a workaround:
        ret.strides = list(map(symbolic.pystr_to_symbolic, ret.strides))

        # Check validity now
        ret.validate()
        return ret

    def __repr__(self):
        return 'Stream (dtype=%s, shape=%s)' % (self.dtype, self.shape)

    def clone(self):
        return Stream(self.dtype, self.veclen, self.buffer_size, self.shape,
                      self.transient, self.storage, self.location,
                      self.strides, self.offset, self.toplevel, self.debuginfo)

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, Stream):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # If both are symbols, ensure equality
            if symbolic.issymbolic(dim) and symbolic.issymbolic(otherdim):
                if dim != otherdim:
                    return False

            # If one is a symbol and the other is a constant
            # make sure they are equivalent
            elif symbolic.issymbolic(otherdim):
                if symbolic.eval(otherdim) != dim:
                    return False
            elif symbolic.issymbolic(dim):
                if symbolic.eval(dim) != otherdim:
                    return False
            else:
                # Any other case (constant vs. constant), check for equality
                if otherdim != dim:
                    return False
        return True

    def signature(self, with_types=True, for_call=False, name=None):
        if not with_types or for_call: return name
        if self.storage in [
                dace.dtypes.StorageType.GPU_Global,
                dace.dtypes.StorageType.GPU_Shared,
                dace.dtypes.StorageType.GPU_Stack
        ]:
            return 'dace::GPUStream<%s, %s> %s' % (str(
                self.dtype.ctype), 'true' if sp.log(
                    self.buffer_size, 2).is_Integer else 'false', name)

        return 'dace::Stream<%s> %s' % (str(self.dtype.ctype), name)

    def sizes(self):
        return [
            d.name if isinstance(d, symbolic.symbol) else str(d)
            for d in self.shape
        ]

    def size_string(self):
        return (" * ".join([
            cppunparse.pyexpr2cpp(dace.symbolic.symstr(s))
            for s in self.strides
        ]))

    def is_stream_array(self):
        return functools.reduce(lambda a, b: a * b, self.strides) != 1

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sympy.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sympy.Symbol(str(s), **s.assumptions0)
                else:
                    s = sympy.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sympy.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sympy.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sympy.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True
Ejemplo n.º 16
0
class GPUMultiTransformMap(transformation.Transformation):
    """ Implements the GPUMultiTransformMap transformation.

        Tiles a single map into 2 maps. The outer map is of schedule 
        GPU_Multidevice and loops over the GPUs, while the inner map
        is a GPU-scheduled map. It also creates GPU transient arrays
        between the two maps.
    """
    _map_entry = nodes.MapEntry(nodes.Map("", [], []))
    dim_idx = Property(dtype=int,
                       default=-1,
                       desc="Index of dimension to be distributed.")

    new_dim_prefix = Property(dtype=str,
                              default="gpu",
                              allow_none=True,
                              desc="Prefix for new dimension name")

    new_transient_prefix = Property(dtype=str,
                                    default="gpu_multi",
                                    allow_none=True,
                                    desc="Prefix for the transient name")

    skip_scalar = Property(
        dtype=bool,
        default=True,
        allow_none=True,
        desc="If True: skips the scalar data nodes. "
        "If False: creates localstorage for scalar transients.")

    use_p2p = Property(
        dtype=bool,
        default=False,
        allow_none=True,
        desc="If True: uses peer-to-peer access if a data container is already "
        "located on a GPU. "
        "If False: creates transient localstorage for data located on GPU.")

    number_of_gpus = SymbolicProperty(
        default=None,
        allow_none=True,
        desc="number of gpus to divide the map onto,"
        " if not used, uses the amount specified"
        " in the dace.config in max_number_gpus.")

    @staticmethod
    def annotates_memlets():
        return True

    @staticmethod
    def expressions():
        return [sdutil.node_path_graph(GPUMultiTransformMap._map_entry)]

    @staticmethod
    def can_be_applied(graph: SDFGState,
                       candidate,
                       expr_index,
                       sdfg,
                       strict=False):
        map_entry = graph.nodes()[candidate[GPUMultiTransformMap._map_entry]]

        # Check if there is more than one GPU available:
        if (Config.get("compiler", "cuda", "max_number_gpus") < 2):
            return False

        # Dynamic map ranges not supported
        if has_dynamic_map_inputs(graph, map_entry):
            return False

        # Only accept maps with a default schedule
        schedule_whitelist = [dtypes.ScheduleType.Default]
        sdict = graph.scope_dict()
        parent = sdict[map_entry]
        while parent is not None:
            if parent.map.schedule not in schedule_whitelist:
                return False
            parent = sdict[parent]

        # Library nodes inside the scope are not supported
        scope_subgraph = graph.scope_subgraph(map_entry)
        for node in scope_subgraph.nodes():
            if isinstance(node, nodes.LibraryNode):
                return False

        # Custom reductions can not have an accumulate transient, as the
        # reduction would have to be split up for the ingoing memlet of the
        # accumulate transient and the outgoing memlet. Not using GPU local
        # accumulate transient only works for a small volume of data.
        map_exit = graph.exit_node(map_entry)
        for edge in graph.out_edges(map_exit):
            if edge.data.wcr is not None and operations.detect_reduction_type(
                    edge.data.wcr) == dtypes.ReductionType.Custom:
                return False

        storage_whitelist = [
            dtypes.StorageType.Default,
            dtypes.StorageType.CPU_Pinned,
            dtypes.StorageType.CPU_Heap,
            dtypes.StorageType.GPU_Global,
        ]
        for node in graph.predecessors(map_entry):
            if not isinstance(node, nodes.AccessNode):
                return False
            if node.desc(graph).storage not in storage_whitelist:
                return False

        for node in graph.successors(map_exit):
            if not isinstance(node, nodes.AccessNode):
                return False
            if node.desc(graph).storage not in storage_whitelist:
                return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = graph.nodes()[candidate[GPUMultiTransformMap._map_entry]]

        return map_entry.map.label

    def apply(self, sdfg: SDFG) -> None:
        graph: SDFGState = sdfg.nodes()[self.state_id]

        inner_map_entry: nodes.MapEntry = graph.nodes()[self.subgraph[
            GPUMultiTransformMap._map_entry]]

        number_of_gpus = self.number_of_gpus
        ngpus = Config.get("compiler", "cuda", "max_number_gpus")
        if (number_of_gpus == None):
            number_of_gpus = ngpus
        if number_of_gpus > ngpus:
            raise ValueError(
                'Requesting more gpus than specified in the dace config')

        # Avoiding import loops
        from dace.transformation.dataflow import (StripMining, InLocalStorage,
                                                  OutLocalStorage,
                                                  AccumulateTransient)

        # The user has responsibility for the implementation of a Library node.
        scope_subgraph = graph.scope_subgraph(inner_map_entry)
        for node in scope_subgraph.nodes():
            if isinstance(node, nodes.LibraryNode):
                warnings.warn(
                    'Node %s is a library node, make sure to manually set the '
                    'implementation to a GPU compliant specialization.' % node)

        # Tile map into number_of_gpus tiles
        outer_map: nodes.Map = StripMining.apply_to(
            sdfg,
            dict(dim_idx=-1,
                 new_dim_prefix=self.new_dim_prefix,
                 tile_size=number_of_gpus,
                 tiling_type=dtypes.TilingType.NumberOfTiles),
            _map_entry=inner_map_entry)

        outer_map_entry: nodes.MapEntry = graph.scope_dict()[inner_map_entry]
        inner_map_exit: nodes.MapExit = graph.exit_node(inner_map_entry)
        outer_map_exit: nodes.MapExit = graph.exit_node(outer_map_entry)

        # Change map schedules
        inner_map_entry.map.schedule = dtypes.ScheduleType.GPU_Device
        outer_map.schedule = dtypes.ScheduleType.GPU_Multidevice

        symbolic_gpu_id = outer_map.params[0]

        # Add the parameter of the outer map
        for node in graph.successors(inner_map_entry):
            if isinstance(node, nodes.NestedSDFG):
                map_syms = inner_map_entry.range.free_symbols
                for sym in map_syms:
                    symname = str(sym)
                    if symname not in node.symbol_mapping.keys():
                        node.symbol_mapping[symname] = sym
                        node.sdfg.symbols[symname] = graph.symbols_defined_at(
                            node)[symname]

        # Add transient Data leading to the inner map
        prefix = self.new_transient_prefix
        for node in graph.predecessors(outer_map_entry):
            # Only AccessNodes are relevant
            if (isinstance(node, nodes.AccessNode)
                    and not (self.skip_scalar
                             and isinstance(node.desc(sdfg), Scalar))):
                if self.use_p2p and node.desc(
                        sdfg).storage is dtypes.StorageType.GPU_Global:
                    continue

                in_data_node = InLocalStorage.apply_to(sdfg,
                                                       dict(array=node.data,
                                                            prefix=prefix),
                                                       verify=False,
                                                       save=False,
                                                       node_a=outer_map_entry,
                                                       node_b=inner_map_entry)
                in_data_node.desc(sdfg).location['gpu'] = symbolic_gpu_id
                in_data_node.desc(sdfg).storage = dtypes.StorageType.GPU_Global

        wcr_data: Dict[str, Any] = {}
        # Add transient Data leading to the outer map
        for edge in graph.in_edges(outer_map_exit):
            node = graph.memlet_path(edge)[-1].dst
            if isinstance(node, nodes.AccessNode):
                data_name = node.data
                # Transients with write-conflict resolution need to be
                # collected first as AccumulateTransient creates a nestedSDFG
                if edge.data.wcr is not None:
                    dtype = sdfg.arrays[data_name].dtype
                    redtype = operations.detect_reduction_type(edge.data.wcr)
                    # Custom reduction can not have an accumulate transient,
                    # as the accumulation from the transient to the outer
                    # storage is not defined.
                    if redtype == dtypes.ReductionType.Custom:
                        warnings.warn(
                            'Using custom reductions in a GPUMultitransformed '
                            'Map only works for a small data volume. For large '
                            'volume there is no guarantee.')
                        continue
                    identity = dtypes.reduction_identity(dtype, redtype)
                    wcr_data[data_name] = identity
                elif (not isinstance(node.desc(sdfg), Scalar)
                      or not self.skip_scalar):
                    if self.use_p2p and node.desc(
                            sdfg).storage is dtypes.StorageType.GPU_Global:
                        continue
                    # Transients without write-conflict resolution
                    if prefix + '_' + data_name in sdfg.arrays:
                        create_array = False
                    else:
                        create_array = True
                    out_data_node = OutLocalStorage.apply_to(
                        sdfg,
                        dict(array=data_name,
                             prefix=prefix,
                             create_array=create_array),
                        verify=False,
                        save=False,
                        node_a=inner_map_exit,
                        node_b=outer_map_exit)
                    out_data_node.desc(sdfg).location['gpu'] = symbolic_gpu_id
                    out_data_node.desc(
                        sdfg).storage = dtypes.StorageType.GPU_Global

        # Add Transients for write-conflict resolution
        if len(wcr_data) != 0:
            nsdfg = AccumulateTransient.apply_to(
                sdfg,
                options=dict(array_identity_dict=wcr_data, prefix=prefix),
                map_exit=inner_map_exit,
                outer_map_exit=outer_map_exit)
            nsdfg.schedule = dtypes.ScheduleType.GPU_Multidevice
            nsdfg.location['gpu'] = symbolic_gpu_id
            for transient_node in graph.successors(nsdfg):
                if isinstance(transient_node, nodes.AccessNode):
                    transient_node.desc(sdfg).location['gpu'] = symbolic_gpu_id
                    transient_node.desc(
                        sdfg).storage = dtypes.StorageType.GPU_Global
                    nsdfg.sdfg.arrays[
                        transient_node.label].location['gpu'] = symbolic_gpu_id
                    nsdfg.sdfg.arrays[
                        transient_node.
                        label].storage = dtypes.StorageType.GPU_Global
            infer_types.set_default_schedule_storage_types_and_location(
                nsdfg.sdfg, dtypes.ScheduleType.GPU_Multidevice,
                symbolic_gpu_id)

        # Remove the parameter of the outer_map from the sdfg symbols,
        # as it got added as a symbol in StripMining.
        if outer_map.params[0] in sdfg.free_symbols:
            sdfg.remove_symbol(outer_map.params[0])
Ejemplo n.º 17
0
class Memlet(object):
    """ Data movement object. Represents the data, the subset moved, and the
        manner it is reindexed (`other_subset`) into the destination.
        If there are multiple conflicting writes, this object also specifies
        how they are resolved with a lambda function.
    """

    # Properties
    veclen = Property(dtype=int, desc="Vector length", default=1)
    num_accesses = SymbolicProperty(default=0)
    subset = SubsetProperty(default=subsets.Range([]))
    other_subset = SubsetProperty(allow_none=True)
    data = DataProperty()
    debuginfo = DebugInfoProperty()
    wcr = LambdaProperty(allow_none=True)
    wcr_conflict = Property(dtype=bool, default=True)
    allow_oob = Property(dtype=bool,
                         default=False,
                         desc='Bypass out-of-bounds validation')

    def __init__(self,
                 data,
                 num_accesses,
                 subset,
                 vector_length,
                 wcr=None,
                 other_subset=None,
                 debuginfo=None,
                 wcr_conflict=True):
        """ Constructs a Memlet.
            :param data: The data object or name to access. B{Note:} this
                         parameter will soon be deprecated.
            @type data: Either a string of the data descriptor name or an
                        AccessNode.
            :param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 `dace.dtypes.DYNAMIC` (-1),
                                 designates that the number of accesses is
                                 unknown at compile time.
            :param subset: The subset of `data` that is going to be accessed.
            :param vector_length: The length of a single unit of access to
                                  the data (used for vectorization
                                  optimizations).
            :param wcr: A lambda function specifying how write-conflicts
                        are resolved. The syntax of the lambda function receives two elements: `current` value and `new` value,
                        and returns the value after resolution. For example,
                        summation is `lambda cur, new: cur + new`.
            :param other_subset: The reindexing of `subset` on the other
                                 connected data.
            :param debuginfo: Source-code information (e.g., line, file)
                              used for debugging.
            :param wcr_conflict: If False, forces non-locked conflict
                                 resolution when generating code. The default
                                 is to let the code generator infer this
                                 information from the SDFG.
        """

        # Properties
        self.num_accesses = num_accesses  # type: sympy.expr.Expr
        self.subset = subset  # type: subsets.Subset
        self.veclen = vector_length  # type: int
        if hasattr(data, 'data'):
            data = data.data
        self.data = data  # type: str

        # Annotates memlet with _how_ writing is performed in case of conflict
        self.wcr = wcr
        self.wcr_conflict = wcr_conflict

        # The subset of the other endpoint we are copying from/to (note:
        # carries the dimensionality of the other endpoint too!)
        self.other_subset = other_subset

        self.debuginfo = debuginfo

    def to_json(self, parent_graph=None):
        attrs = dace.serialize.all_properties_to_json(self)

        retdict = {"type": "Memlet", "attributes": attrs}

        return retdict

    @staticmethod
    def from_json(json_obj, context=None):
        if json_obj['type'] != "Memlet":
            raise TypeError("Invalid data type")

        # Create dummy object
        ret = Memlet("", dace.dtypes.DYNAMIC, None, 1)
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)

        return ret

    @staticmethod
    def simple(data,
               subset_str,
               veclen=1,
               wcr_str=None,
               other_subset_str=None,
               wcr_conflict=True,
               num_accesses=None,
               debuginfo=None):
        """ Constructs a Memlet from string-based expressions.
            :param data: The data object or name to access. B{Note:} this
                         parameter will soon be deprecated.
            @type data: Either a string of the data descriptor name or an
                        AccessNode.
            :param subset_str: The subset of `data` that is going to
                               be accessed in string format. Example: '0:N'.
            :param veclen: The length of a single unit of access to
                           the data (used for vectorization optimizations).
            :param wcr_str: A lambda function (as a string) specifying
                            how write-conflicts are resolved. The syntax
                            of the lambda function receives two elements:
                            `current` value and `new` value,
                            and returns the value after resolution. For
                            example, summation is
                            `'lambda cur, new: cur + new'`.
            :param other_subset_str: The reindexing of `subset` on the other
                                     connected data (as a string).
            :param wcr_conflict: If False, forces non-locked conflict
                                 resolution when generating code. The default
                                 is to let the code generator infer this
                                 information from the SDFG.
            :param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 `dace.dtypes.DYNAMIC` (-1),
                                 designates that the number of accesses is
                                 unknown at compile time.
            :param debuginfo: Source-code information (e.g., line, file)
                              used for debugging.

        """
        subset = SubsetProperty.from_string(subset_str)
        if num_accesses is not None:
            na = num_accesses
        else:
            na = subset.num_elements()

        if wcr_str is not None:
            wcr = LambdaProperty.from_string(wcr_str)
        else:
            wcr = None

        if other_subset_str is not None:
            other_subset = SubsetProperty.from_string(other_subset_str)
        else:
            other_subset = None

        # If it is an access node or another memlet
        if hasattr(data, 'data'):
            data = data.data

        return Memlet(data,
                      na,
                      subset,
                      veclen,
                      wcr=wcr,
                      other_subset=other_subset,
                      wcr_conflict=wcr_conflict,
                      debuginfo=debuginfo)

    @staticmethod
    def from_array(dataname, datadesc, wcr=None):
        """ Constructs a Memlet that transfers an entire array's contents.
            :param dataname: The name of the data descriptor in the SDFG.
            :param datadesc: The data descriptor object.
            :param wcr: The conflict resolution lambda.
            @type datadesc: Data.
        """
        range = subsets.Range.from_array(datadesc)
        return Memlet(dataname, range.num_elements(), range, 1, wcr=wcr)

    def __hash__(self):
        return hash((self.data, self.num_accesses, self.subset, self.veclen,
                     str(self.wcr), self.other_subset))

    def __eq__(self, other):
        return all([
            self.data == other.data, self.num_accesses == other.num_accesses,
            self.subset == other.subset, self.veclen == other.veclen,
            self.wcr == other.wcr, self.other_subset == other.other_subset
        ])

    def num_elements(self):
        """ Returns the number of elements in the Memlet subset. """
        return self.subset.num_elements()

    def bounding_box_size(self):
        """ Returns a per-dimension upper bound on the maximum number of
            elements in each dimension.

            This bound will be tight in the case of Range.
        """
        return self.subset.bounding_box_size()

    def validate(self, sdfg, state):
        if self.data is not None and self.data not in sdfg.arrays:
            raise KeyError('Array "%s" not found in SDFG' % self.data)

    @property
    def free_symbols(self) -> Set[str]:
        """ Returns a set of symbols used in this edge's properties. """
        # Symbolic properties are in num_accesses, and the two subsets
        result = set()
        result |= set(map(str, self.num_accesses.free_symbols))
        if self.subset:
            result |= self.subset.free_symbols
        if self.other_subset:
            result |= self.other_subset.free_symbols
        return result

    def __label__(self, sdfg, state):
        """ Returns a string representation of the memlet for display in a
            graph.

            :param sdfg: The SDFG in which the memlet resides.
            :param state: An SDFGState object in which the memlet resides.
        """
        if self.data is None:
            return self._label(None)
        return self._label(sdfg.arrays[self.data].shape)

    def __str__(self):
        return self._label(None)

    def _label(self, shape):
        result = ''
        if self.data is not None:
            result = self.data

        if self.subset is None:
            return result

        num_elements = self.subset.num_elements()
        if self.num_accesses != num_elements:
            if self.num_accesses == -1:
                result += '(dyn) '
            else:
                result += '(%s) ' % SymbolicProperty.to_string(
                    self.num_accesses)
        arrayNotation = True
        try:
            if shape is not None and reduce(operator.mul, shape, 1) == 1:
                # Don't mention array if we're accessing a single element and it's zero
                if all(s == 0 for s in self.subset.min_element()):
                    arrayNotation = False
        except TypeError:
            # Will fail if trying to check the truth value of a sympy expr
            pass
        if arrayNotation:
            result += '[%s]' % str(self.subset)
        if self.wcr is not None and str(self.wcr) != '':
            # Autodetect reduction type
            redtype = detect_reduction_type(self.wcr)
            if redtype == dtypes.ReductionType.Custom:
                wcrstr = unparse(ast.parse(self.wcr).body[0].value.body)
            else:
                wcrstr = str(redtype)
                wcrstr = wcrstr[wcrstr.find('.') + 1:]  # Skip "ReductionType."

            result += ' (CR: %s)' % wcrstr

        if self.other_subset is not None:
            result += ' -> [%s]' % str(self.other_subset)
        return result

    def __repr__(self):
        return "Memlet (" + self.__str__() + ")"
Ejemplo n.º 18
0
class SVEVectorization(transformation.SingleStateTransformation):
    """ Implements the Arm SVE vectorization transform.

        Takes a map entry of a possibly multidimensional map and enforces a
        vectorization on the innermost param for the SVE codegen.
"""

    map_entry = transformation.PatternNode(nodes.MapEntry)

    vec_len = SymbolicProperty(desc="Vector length", default=util.SVE_LEN)

    @classmethod
    def expressions(cls):
        return [sdutil.node_path_graph(cls.map_entry)]

    def can_be_applied(self,
                       state: SDFGState,
                       expr_index,
                       sdfg: SDFG,
                       permissive=False) -> bool:
        map_entry = self.map_entry
        current_map = map_entry.map
        subgraph = state.scope_subgraph(map_entry)
        subgraph_contents = state.scope_subgraph(map_entry,
                                                 include_entry=False,
                                                 include_exit=False)

        # Prevent infinite repeats
        if current_map.schedule == dace.dtypes.ScheduleType.SVE_Map:
            return False

        # Infer all connector types for later checks (without modifying the graph)
        inferred = infer_types.infer_connector_types(sdfg, state, subgraph)

        ########################
        # Ensure only Tasklets and AccessNodes are within the map
        for node, _ in subgraph_contents.all_nodes_recursive():
            if not isinstance(node, (nodes.Tasklet, nodes.AccessNode)):
                return False

        ########################
        # Check for unsupported datatypes on the connectors (including on the Map itself)
        bit_widths = set()
        for node, _ in subgraph.all_nodes_recursive():
            for conn in node.in_connectors:
                t = inferred[(node, conn, True)]
                bit_widths.add(util.get_base_type(t).bytes)
                if not t.type in sve.util.TYPE_TO_SVE:
                    return False
            for conn in node.out_connectors:
                t = inferred[(node, conn, False)]
                bit_widths.add(util.get_base_type(t).bytes)
                if not t.type in sve.util.TYPE_TO_SVE:
                    return False

        # Multiple different bit widths occuring (messes up the predicates)
        if len(bit_widths) > 1:
            return False

        ########################
        # Check for unsupported memlets
        param_name = current_map.params[-1]
        for e, _ in subgraph.all_edges_recursive():
            # Check for unsupported strides
            # The only unsupported strides are the ones containing the innermost
            # loop param because they are not constant during a vector step
            param_sym = symbolic.symbol(current_map.params[-1])

            if param_sym in e.data.get_stride(sdfg,
                                              map_entry.map).free_symbols:
                return False

            # Check for unsupported WCR
            if e.data.wcr is not None:
                # Unsupported reduction type
                reduction_type = dace.frontend.operations.detect_reduction_type(
                    e.data.wcr)
                if reduction_type not in sve.util.REDUCTION_TYPE_TO_SVE:
                    return False

                # Param in memlet during WCR is not supported
                if param_name in e.data.subset.free_symbols and e.data.wcr_nonatomic:
                    return False

                # vreduce is not supported
                dst_node = state.memlet_path(e)[-1]
                if isinstance(dst_node, nodes.Tasklet):
                    if isinstance(dst_node.in_connectors[e.dst_conn],
                                  dtypes.vector):
                        return False
                elif isinstance(dst_node, nodes.AccessNode):
                    desc = dst_node.desc(sdfg)
                    if isinstance(desc, data.Scalar) and isinstance(
                            desc.dtype, dtypes.vector):
                        return False

        ########################
        # Check for invalid copies in the subgraph
        for node, _ in subgraph.all_nodes_recursive():
            if not isinstance(node, nodes.Tasklet):
                continue

            for e in state.in_edges(node):
                # Check for valid copies from other tasklets and/or streams
                if e.data.data is not None:
                    src_node = state.memlet_path(e)[0].src
                    if not isinstance(src_node,
                                      (nodes.Tasklet, nodes.AccessNode)):
                        # Make sure we only have Code->Code copies and from arrays
                        return False

                    if isinstance(src_node, nodes.AccessNode):
                        src_desc = src_node.desc(sdfg)
                        if isinstance(src_desc, dace.data.Stream):
                            # Stream pops are not implemented
                            return False

        # Run the vector inference algorithm to check if vectorization is feasible
        try:
            vector_inference.infer_vectors(
                sdfg,
                state,
                map_entry,
                self.vec_len,
                flags=vector_inference.VectorInferenceFlags.Allow_Stride,
                apply=False)
        except vector_inference.VectorInferenceException as ex:
            return False

        return True

    def apply(self, state: SDFGState, sdfg: SDFG):
        map_entry = self.map_entry
        current_map = map_entry.map

        # Expand the innermost map if multidimensional
        if len(current_map.params) > 1:
            ext, rem = dace.transformation.helpers.extract_map_dims(
                sdfg, map_entry, list(range(len(current_map.params) - 1)))
            map_entry = rem
            current_map = map_entry.map

        subgraph = state.scope_subgraph(map_entry)

        # Set the schedule
        current_map.schedule = dace.dtypes.ScheduleType.SVE_Map

        # Infer all connector types and apply them
        inferred = infer_types.infer_connector_types(sdfg, state, subgraph)
        infer_types.apply_connector_types(inferred)

        # Infer vector connectors and AccessNodes and apply them
        vector_inference.infer_vectors(
            sdfg,
            state,
            map_entry,
            self.vec_len,
            flags=vector_inference.VectorInferenceFlags.Allow_Stride,
            apply=True)
Ejemplo n.º 19
0
class GPUTransformLocalStorage(transformation.Transformation):
    """Implements the GPUTransformLocalStorage transformation.

        Similar to GPUTransformMap, but takes multiple maps leading from the
        same data node into account, creating a local storage for each range.

        @see: GPUTransformMap
    """

    _arrays_removed = 0
    _maps_transformed = 0

    fullcopy = Property(desc="Copy whole arrays rather than used subset",
                        dtype=bool,
                        default=False)

    nested_seq = Property(
        desc="Makes nested code semantically-equivalent to single-core code,"
        "transforming nested maps and memory into sequential and "
        "local memory respectively.",
        dtype=bool,
        default=True,
    )

    gpu_id = SymbolicProperty(default=None,
                              allow_none=True,
                              desc="Selects which gpu the map should run on")

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    import dace.libraries.standard as stdlib  # Avoid import loop
    _reduce = stdlib.Reduce("lambda: None", None)

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(GPUTransformLocalStorage._map_entry),
            sdutil.node_path_graph(GPUTransformLocalStorage._reduce),
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        if expr_index == 0:
            map_entry = graph.nodes()[candidate[
                GPUTransformLocalStorage._map_entry]]
            candidate_map = map_entry.map

            # Disallow GPUTransform on nested maps in strict mode
            if strict:
                if graph.entry_node(map_entry) is not None:
                    return False

            # Map schedules that are disallowed to transform to GPUs
            if (candidate_map.schedule == dtypes.ScheduleType.MPI
                    or candidate_map.schedule == dtypes.ScheduleType.GPU_Device
                    or candidate_map.schedule
                    == dtypes.ScheduleType.GPU_ThreadBlock or
                    candidate_map.schedule == dtypes.ScheduleType.Sequential):
                return False

            # Dynamic map ranges cannot become kernels
            if sd.has_dynamic_map_inputs(graph, map_entry):
                return False

            # Recursively check parent for GPU schedules
            sdict = graph.scope_dict()
            current_node = map_entry
            while current_node is not None:
                if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device
                        or current_node.map.schedule
                        == dtypes.ScheduleType.GPU_ThreadBlock):
                    return False
                current_node = sdict[current_node]

            # Ensure that map does not include internal arrays that are
            # allocated on non-default space
            subgraph = graph.scope_subgraph(map_entry)
            for node in subgraph.nodes():
                if (isinstance(node, nodes.AccessNode) and
                        node.desc(sdfg).storage != dtypes.StorageType.Default
                        and
                        node.desc(sdfg).storage != dtypes.StorageType.Register):
                    return False

            # If one of the outputs is a stream, do not match
            map_exit = graph.exit_node(map_entry)
            for edge in graph.out_edges(map_exit):
                dst = graph.memlet_path(edge)[-1].dst
                if (isinstance(dst, nodes.AccessNode)
                        and isinstance(sdfg.arrays[dst.data], data.Stream)):
                    return False

            return True
        elif expr_index == 1:
            reduce = graph.nodes()[candidate[GPUTransformLocalStorage._reduce]]

            # Recursively check parent for GPU schedules
            sdict = graph.scope_dict()
            current_node = sdict[reduce]
            while current_node is not None:
                if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device
                        or current_node.map.schedule
                        == dtypes.ScheduleType.GPU_ThreadBlock):
                    return False
                current_node = sdict[current_node]

            return True

    @staticmethod
    def match_to_str(graph, candidate):
        if GPUTransformLocalStorage._reduce in candidate:
            return str(
                graph.nodes()[candidate[GPUTransformLocalStorage._reduce]])
        else:
            map_entry = graph.nodes()[candidate[
                GPUTransformLocalStorage._map_entry]]
            return str(map_entry)

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        gpu_id = self.gpu_id
        if self.expr_index == 0:
            cnode: nodes.MapEntry = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._map_entry]]
            # Change schedule
            cnode.schedule = dtypes.ScheduleType.GPU_Device
            exit_node = graph.exit_node(cnode)
            if gpu_id:
                cnode.location = {'gpu': gpu_id}
                for node in graph.all_nodes_between(cnode, exit_node):
                    if isinstance(node, nodes.CodeNode):
                        node.location = {'gpu': gpu_id}
        else:
            cnode: nodes.LibraryNode = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._reduce]]
            # Change schedule
            cnode.schedule = dtypes.ScheduleType.GPU_Default
            if gpu_id:
                cnode.location = {'gpu': gpu_id}
            exit_node = cnode

        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._maps_transformed += 1
        # If nested graph is designated as sequential, transform schedules and
        # storage from Default to Sequential/Register
        if self.nested_seq and self.expr_index == 0:
            for node in graph.scope_subgraph(cnode).nodes():
                if isinstance(node, nodes.AccessNode):
                    arr = node.desc(sdfg)
                    if arr.storage == dtypes.StorageType.Default:
                        arr.storage = dtypes.StorageType.Register
                elif isinstance(node, nodes.MapEntry):
                    if node.map.schedule == dtypes.ScheduleType.Default:
                        node.map.schedule = dtypes.ScheduleType.Sequential

        gpu_storage_types = [
            dtypes.StorageType.GPU_Global,
            dtypes.StorageType.GPU_Shared,
        ]

        #######################################################
        # Add GPU copies of CPU arrays (i.e., not already on GPU)

        # First, understand which arrays to clone
        all_out_edges = []
        all_out_edges.extend(list(graph.out_edges(exit_node)))
        in_arrays_to_clone = set()
        out_arrays_to_clone = set()
        for e in graph.in_edges(cnode):
            data_node = sd.find_input_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                in_arrays_to_clone.add((data_node, e.data))
        for e in all_out_edges:
            data_node = sd.find_output_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                out_arrays_to_clone.add((data_node, e.data))

        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._arrays_removed += len(
                in_arrays_to_clone) + len(out_arrays_to_clone)

        # Second, create a GPU clone of each array
        # TODO: Overapproximate union of memlets
        cloned_arrays = {}
        in_cloned_arraynodes = {}
        out_cloned_arraynodes = {}
        for array_node, memlet in in_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                if gpu_id:
                    sdfg.arrays[cloned_name].location = {'gpu': gpu_id}
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)

            in_cloned_arraynodes[array_node.data] = cloned_node
        for array_node, memlet in out_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                if gpu_id:
                    sdfg.arrays[cloned_name].location = {'gpu': gpu_id}
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)
            cloned_node.setzero = True

            out_cloned_arraynodes[array_node.data] = cloned_node

        # Third, connect the cloned arrays to the originals
        for array_name, node in in_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in graph.in_edges(cnode):
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   newmemlet)

                    for e in graph.bfs_edges(edge.dst, reverse=False):
                        parent, _, _child, _, memlet = e
                        if parent != edge.dst and not in_scope(
                                graph, parent, edge.dst):
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[-1].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.ExitNode, forward=True):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   edge.data)
                    graph.remove_edge(edge)

        for array_name, node in out_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in all_out_edges:
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   newmemlet)

                    end_node = graph.entry_node(edge.src)
                    for e in graph.bfs_edges(edge.src, reverse=True):
                        parent, _, _child, _, memlet = e
                        if parent == end_node:
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[0].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.EntryNode, forward=False):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    edge.data.wcr = None
                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   edge.data)
                    graph.remove_edge(edge)

        # Fourth, replace memlet arrays as necessary
        if self.expr_index == 0:
            scope_subgraph = graph.scope_subgraph(cnode)
            for edge in scope_subgraph.edges():
                if edge.data.data is not None and edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]
Ejemplo n.º 20
0
class Consume(object):
    """ Consume is a scope, like `Map`, that is a part of the parametric 
        graph extension of the SDFG. It creates a producer-consumer 
        relationship between the input stream and the scope subgraph. The
        subgraph is scheduled to a given number of processing elements
        for processing, and they will try to pop elements from the input
        stream until a given quiescence condition is reached. """

    # Properties
    label = Property(dtype=str, desc="Name of the consume node")
    pe_index = Property(dtype=str, desc="Processing element identifier")
    num_pes = SymbolicProperty(desc="Number of processing elements")
    condition = CodeProperty(desc="Quiescence condition", allow_none=True)
    language = Property(enum=types.Language, default=types.Language.Python)
    schedule = Property(dtype=types.ScheduleType,
                        desc="Consume schedule",
                        enum=types.ScheduleType,
                        from_string=lambda x: types.ScheduleType[x])
    chunksize = Property(dtype=int,
                         desc="Maximal size of elements to consume at a time",
                         default=1)
    debuginfo = DebugInfoProperty()
    is_collapsed = Property(dtype=bool,
                            desc="Show this node/scope/state as collapsed",
                            default=False)

    def as_map(self):
        """ Compatibility function that allows to view the consume as a map,
            mainly in memlet propagation. """
        return Map(self.label, [self.pe_index],
                   sbs.Range([(0, self.num_pes - 1, 1)]), self.schedule)

    def __init__(self,
                 label,
                 pe_tuple,
                 condition,
                 schedule=types.ScheduleType.Default,
                 chunksize=1,
                 debuginfo=None):
        super(Consume, self).__init__()

        # Properties
        self.label = label
        self.pe_index, self.num_pes = pe_tuple
        self.condition = condition
        self.schedule = schedule
        self.chunksize = chunksize
        self.debuginfo = debuginfo

    def __str__(self):
        if self.condition is not None:
            return ("%s [%s=0:%s], Condition: %s" %
                    (self._label, self.pe_index, self.num_pes,
                     CodeProperty.to_string(self.condition)))
        else:
            return ("%s [%s=0:%s]" %
                    (self._label, self.pe_index, self.num_pes))

    def validate(self, sdfg, state, node):
        if not data.validate_name(self.label):
            raise NameError('Invalid consume name "%s"' % self.label)

    def get_param_num(self):
        """ Returns the number of consume dimension parameters/symbols. """
        return 1
Ejemplo n.º 21
0
class Array(Data):
    """
    Array data descriptor. This object represents a multi-dimensional data container in SDFGs that can be accessed and
    modified. The definition does not contain the actual array, but rather a description of how to construct it and
    how it should behave.

    The array definition is flexible in terms of data allocation, it allows arbitrary multidimensional, potentially
    symbolic shapes (e.g., an array with size ``N+1 x M`` will have ``shape=(N+1, M)``), of arbitrary data 
    typeclasses (``dtype``). The physical data layout of the array is controlled by several properties:
       * The ``strides`` property determines the ordering and layout of the dimensions --- it specifies how many
         elements in memory are skipped whenever one element in that dimension is advanced. For example, the contiguous
         dimension always has a stride of ``1``; a C-style MxN array will have strides ``(N, 1)``, whereas a 
         FORTRAN-style array of the same size will have ``(1, M)``. Strides can be larger than the shape, which allows
         post-padding of the contents of each dimension.
       * The ``start_offset`` property is a number of elements to pad the beginning of the memory buffer with. This is
         used to ensure that a specific index is aligned as a form of pre-padding (that element may not necessarily be
         the first element, e.g., in the case of halo or "ghost cells" in stencils).
       * The ``total_size`` property determines how large the total allocation size is. Normally, it is the product of
         the ``shape`` elements, but if pre- or post-padding is involved it may be larger.
       * ``alignment`` provides alignment guarantees (in bytes) of the first element in the allocated array. This is
         used by allocators in the code generator to ensure certain addresses are expected to be aligned, e.g., for
         vectorization.
       * Lastly, a property called ``offset`` controls the logical access of the array, i.e., what would be the first
         element's index after padding and alignment. This mimics a language feature prominent in scientific languages
         such as FORTRAN, where one could set an array to begin with 1, or any arbitrary index. By default this is set
         to zero.

    To summarize with an example, a two-dimensional array with pre- and post-padding looks as follows:
    
    .. code-block:: text

        [xxx][          |xx]
             [          |xx]
             [          |xx]
             [          |xx]
             ---------------
             [xxxxxxxxxxxxx]

        shape = (4, 10)
        strides = (12, 1)
        start_offset = 3
        total_size = 63   [= 3 + 12 * 5]
        offset = (0, 0, 0)


    Notice that the last padded row does not appear in strides, but is a consequence of ``total_size`` being larger.
    

    Apart from memory layout, other properties of ``Array`` help the data-centric transformation infrastructure make
    decisions about the array. ``allow_conflicts`` states that warnings should not be printed if potential conflicted
    acceses (e.g., data races) occur. ``may_alias`` inhibits transformations that may assume that this array does not
    overlap with other arrays in the same context (e.g., function).
    """

    # Properties
    allow_conflicts = Property(dtype=bool,
                               default=False,
                               desc='If enabled, allows more than one '
                               'memlet to write to the same memory location without conflict '
                               'resolution.')

    strides = ShapeProperty(
        # element_type=symbolic.pystr_to_symbolic,
        desc='For each dimension, the number of elements to '
        'skip in order to obtain the next element in '
        'that dimension.')

    total_size = SymbolicProperty(default=0, desc='The total allocated size of the array. Can be used for padding.')

    offset = ShapeProperty(desc='Initial offset to translate all indices by.')

    may_alias = Property(dtype=bool,
                         default=False,
                         desc='This pointer may alias with other pointers in the same function')

    alignment = Property(dtype=int, default=0, desc='Allocation alignment in bytes (0 uses compiler-default)')

    start_offset = Property(dtype=int, default=0, desc='Allocation offset elements for manual alignment (pre-padding)')

    def __init__(self,
                 dtype,
                 shape,
                 transient=False,
                 allow_conflicts=False,
                 storage=dtypes.StorageType.Default,
                 location=None,
                 strides=None,
                 offset=None,
                 may_alias=False,
                 lifetime=dtypes.AllocationLifetime.Scope,
                 alignment=0,
                 debuginfo=None,
                 total_size=None,
                 start_offset=None):

        super(Array, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo)

        if shape is None:
            raise IndexError('Shape must not be None')

        self.allow_conflicts = allow_conflicts
        self.may_alias = may_alias
        self.alignment = alignment
        if start_offset is not None:
            self.start_offset = start_offset

        if strides is not None:
            self.strides = cp.copy(strides)
        else:
            self.strides = [_prod(shape[i + 1:]) for i in range(len(shape))]

        if strides is not None and shape is not None and total_size is None:
            # Compute the minimal total_size that could be used with strides and shape
            self.total_size = sum(((shp - 1) * s for shp, s in zip(shape, strides))) + 1
        else:
            self.total_size = total_size or _prod(shape)

        if offset is not None:
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        self.validate()

    def __repr__(self):
        return '%s (dtype=%s, shape=%s)' % (type(self).__name__, self.dtype, self.shape)

    def clone(self):
        return type(self)(self.dtype, self.shape, self.transient, self.allow_conflicts, self.storage, self.location,
                          self.strides, self.offset, self.may_alias, self.lifetime, self.alignment, self.debuginfo,
                          self.total_size, self.start_offset)

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @classmethod
    def from_json(cls, json_obj, context=None):
        # Create dummy object
        ret = cls(dtypes.int8, ())
        serialize.set_properties_from_json(ret, json_obj, context=context)

        # Default shape-related properties
        if not ret.offset:
            ret.offset = [0] * len(ret.shape)
        if not ret.strides:
            # Default strides are C-ordered
            ret.strides = [_prod(ret.shape[i + 1:]) for i in range(len(ret.shape))]
        if ret.total_size == 0:
            ret.total_size = _prod(ret.shape)

        # Check validity now
        ret.validate()
        return ret

    def validate(self):
        super(Array, self).validate()
        if len(self.strides) != len(self.shape):
            raise TypeError('Strides must be the same size as shape')

        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.strides):
            raise TypeError('Strides must be a list or tuple of integer ' 'values or symbols')

        if len(self.offset) != len(self.shape):
            raise TypeError('Offset must be the same size as shape')

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sp.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sp.Symbol(str(s), **s.assumptions0)
                else:
                    s = sp.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sp.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sp.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sp.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, Array):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # Any other case (constant vs. constant), check for equality
            if otherdim != dim:
                return False
        return True

    def as_arg(self, with_types=True, for_call=False, name=None):
        arrname = name

        if not with_types or for_call:
            return arrname
        if self.may_alias:
            return str(self.dtype.ctype) + ' *' + arrname
        return str(self.dtype.ctype) + ' * __restrict__ ' + arrname

    def sizes(self):
        return [d.name if isinstance(d, symbolic.symbol) else str(d) for d in self.shape]

    @property
    def free_symbols(self):
        result = super().free_symbols
        for s in self.strides:
            if isinstance(s, sp.Expr):
                result |= set(s.free_symbols)
        if isinstance(self.total_size, sp.Expr):
            result |= set(self.total_size.free_symbols)
        for o in self.offset:
            if isinstance(o, sp.Expr):
                result |= set(o.free_symbols)

        return result