Esempio n. 1
0
class Transformation(object):
    """ Base class for transformations, as well as a static registry of
        transformations, where new transformations can be added in a
        decentralized manner.

        New transformations are registered with ``Transformation.register``
        (or ``dace.registry.autoregister_params``) with two optional boolean
        keyword arguments: ``singlestate`` (default: False) and ``strict``
        (default: False).
        If ``singlestate`` is True, the transformation is matched on subgraphs
        inside an SDFGState; otherwise, subgraphs of the SDFG state machine are
        matched.
        If ``strict`` is True, this transformation will be considered strict
        (i.e., always beneficial to perform) and will be performed automatically
        as part of SDFG strict transformations.
    """

    # Properties
    sdfg_id = Property(dtype=int, category="(Debug)")
    state_id = Property(dtype=int, category="(Debug)")
    _subgraph = DictProperty(key_type=int, value_type=int, category="(Debug)")
    expr_index = Property(dtype=int, category="(Debug)")

    @staticmethod
    def annotates_memlets():
        """ Indicates whether the transformation annotates the edges it creates
            or modifies with the appropriate memlets. This determines
            whether to apply memlet propagation after the transformation.
        """

        return False

    @staticmethod
    def expressions():
        """ Returns a list of Graph objects that will be matched in the
            subgraph isomorphism phase. Used as a pre-pass before calling
            `can_be_applied`.
            @see Transformation.can_be_applied
        """

        raise NotImplementedError

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        """ Returns True if this transformation can be applied on the candidate
            matched subgraph.
            :param graph: SDFGState object if this Transformation is
                          single-state, or SDFG object otherwise.
            :param candidate: A mapping between node IDs returned from
                              `Transformation.expressions` and the nodes in
                              `graph`.
            :param expr_index: The list index from `Transformation.expressions`
                               that was matched.
            :param sdfg: If `graph` is an SDFGState, its parent SDFG. Otherwise
                         should be equal to `graph`.
            :param strict: Whether transformation should run in strict mode.
            :return: True if the transformation can be applied.
        """
        raise NotImplementedError

    @staticmethod
    def match_to_str(graph, candidate):
        """ Returns a string representation of the pattern match on the
            candidate subgraph. Used when identifying matches in the console
            UI.
        """
        raise NotImplementedError

    def __init__(self, sdfg_id, state_id, subgraph, expr_index):
        """ Initializes an instance of Transformation.
            :param sdfg_id: A unique ID of the SDFG.
            :param state_id: The node ID of the SDFG state, if applicable.
            :param subgraph: A mapping between node IDs returned from
                             `Transformation.expressions` and the nodes in
                             `graph`.
            :param expr_index: The list index from `Transformation.expressions`
                               that was matched.
            :raise TypeError: When transformation is not subclass of
                              Transformation.
            :raise TypeError: When state_id is not instance of int.
            :raise TypeError: When subgraph is not a dict of
                              dace.sdfg.nodes.Node : int.
        """

        self.sdfg_id = sdfg_id
        self.state_id = state_id
        for value in subgraph.values():
            if not isinstance(value, int):
                raise TypeError('All values of '
                                'subgraph'
                                ' dictionary must be '
                                'instances of int.')
        # Serializable subgraph with node IDs as keys
        expr = self.expressions()[expr_index]
        self._subgraph = {expr.node_id(k): v for k, v in subgraph.items()}
        self._subgraph_user = subgraph
        self.expr_index = expr_index

    @property
    def subgraph(self):
        return self._subgraph_user

    def __lt__(self, other):
        """ Comparing two transformations by their class name and node IDs
            in match. Used for ordering transformations consistently.
        """
        if type(self) != type(other):
            return type(self).__name__ < type(other).__name__

        self_ids = iter(self.subgraph.values())
        other_ids = iter(self.subgraph.values())

        try:
            self_id = next(self_ids)
        except StopIteration:
            return True
        try:
            other_id = next(other_ids)
        except StopIteration:
            return False

        self_end = False

        while self_id is not None and other_id is not None:
            if self_id != other_id:
                return self_id < other_id
            try:
                self_id = next(self_ids)
            except StopIteration:
                self_end = True
            try:
                other_id = next(other_ids)
            except StopIteration:
                if self_end:  # Transformations are equal
                    return False
                return False
            if self_end:
                return True

    def apply_pattern(self, sdfg):
        """ Applies this transformation on the given SDFG. """
        self.apply(sdfg)
        if not self.annotates_memlets():
            propagation.propagate_memlets_sdfg(sdfg)

    def __str__(self):
        return type(self).__name__

    def modifies_graph(self):
        return True

    def print_match(self, sdfg):
        """ Returns a string representation of the pattern match on the
            given SDFG. Used for printing matches in the console UI.
        """
        if not isinstance(sdfg, dace.SDFG):
            raise TypeError("Expected SDFG, got: {}".format(
                type(sdfg).__name__))
        if self.state_id == -1:
            graph = sdfg
        else:
            graph = sdfg.nodes()[self.state_id]
        string = type(self).__name__ + ' in '
        string += type(self).match_to_str(graph, self.subgraph)
        return string

    def to_json(self, parent=None):
        props = dace.serialize.all_properties_to_json(self)
        return {
            'type': 'Transformation',
            'transformation': type(self).__name__,
            **props
        }

    @staticmethod
    def from_json(json_obj, context=None):
        xform = next(ext for ext in Transformation.extensions().keys()
                     if ext.__name__ == json_obj['transformation'])

        # Recreate subgraph
        expr = xform.expressions()[json_obj['expr_index']]
        subgraph = {
            expr.node(int(k)): int(v)
            for k, v in json_obj['_subgraph'].items()
        }

        # Reconstruct transformation
        ret = xform(json_obj['sdfg_id'], json_obj['state_id'], subgraph,
                    json_obj['expr_index'])
        context = context or {}
        context['transformation'] = ret
        dace.serialize.set_properties_from_json(
            ret,
            json_obj,
            context=context,
            ignore_properties={'transformation', 'type'})
        return ret
Esempio n. 2
0
class AccessNode(Node):
    """ A node that accesses data in the SDFG. Denoted by a circular shape. """

    access = Property(choices=dtypes.AccessType,
                      desc="Type of access to this array",
                      default=dtypes.AccessType.ReadWrite)
    setzero = Property(dtype=bool, desc="Initialize to zero", default=False)
    debuginfo = DebugInfoProperty()
    data = DataProperty(desc="Data (array, stream, scalar) to access")

    def __init__(self,
                 data,
                 access=dtypes.AccessType.ReadWrite,
                 debuginfo=None):
        super(AccessNode, self).__init__()

        # Properties
        self.debuginfo = debuginfo
        self.access = access
        if not isinstance(data, str):
            raise TypeError('Data for AccessNode must be a string')
        self.data = data

    @staticmethod
    def from_json(json_obj, context=None):
        ret = AccessNode("Nodata")
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)
        return ret

    def __deepcopy__(self, memo):
        node = object.__new__(AccessNode)
        node._access = self._access
        node._data = self._data
        node._setzero = self._setzero
        node._in_connectors = dcpy(self._in_connectors, memo=memo)
        node._out_connectors = dcpy(self._out_connectors, memo=memo)
        node._debuginfo = dcpy(self._debuginfo, memo=memo)
        return node

    @property
    def label(self):
        return self.data

    def __label__(self, sdfg, state):
        return self.data

    def desc(self, sdfg):
        from dace.sdfg import SDFGState, ScopeSubgraphView
        if isinstance(sdfg, (SDFGState, ScopeSubgraphView)):
            sdfg = sdfg.parent
        return sdfg.arrays[self.data]

    def validate(self, sdfg, state):
        if self.data not in sdfg.arrays:
            raise KeyError('Array "%s" not found in SDFG' % self.data)

    def has_writes(self, state):
        for e in state.in_edges(self):
            if not e.data.is_empty():
                return True
        return False

    def has_reads(self, state):
        for e in state.out_edges(self):
            if not e.data.is_empty():
                return True
        return False
Esempio n. 3
0
class NestedSDFG(CodeNode):
    """ An SDFG state node that contains an SDFG of its own, runnable using
        the data dependencies specified using its connectors.

        It is encouraged to use nested SDFGs instead of coarse-grained tasklets
        since they are analyzable with respect to transformations.

        @note: A nested SDFG cannot create recursion (one of its parent SDFGs).
    """

    # NOTE: We cannot use SDFG as the type because of an import loop
    sdfg = SDFGReferenceProperty(desc="The SDFG", allow_none=True)
    schedule = Property(dtype=dtypes.ScheduleType,
                        desc="SDFG schedule",
                        allow_none=True,
                        choices=dtypes.ScheduleType,
                        from_string=lambda x: dtypes.ScheduleType[x],
                        default=dtypes.ScheduleType.Default)
    symbol_mapping = DictProperty(
        key_type=str,
        value_type=dace.symbolic.pystr_to_symbolic,
        desc="Mapping between internal symbols and their values, expressed as "
        "symbolic expressions")
    debuginfo = DebugInfoProperty()
    is_collapsed = Property(dtype=bool,
                            desc="Show this node/scope/state as collapsed",
                            default=False)

    instrument = Property(
        choices=dtypes.InstrumentationType,
        desc="Measure execution statistics with given method",
        default=dtypes.InstrumentationType.No_Instrumentation)

    no_inline = Property(
        dtype=bool,
        desc="If True, this nested SDFG will not be inlined in strict mode "
        "(in the InlineSDFG transformation)",
        default=False)

    def __init__(self,
                 label,
                 sdfg,
                 inputs: Set[str],
                 outputs: Set[str],
                 symbol_mapping: Dict[str, Any] = None,
                 schedule=dtypes.ScheduleType.Default,
                 location=None,
                 debuginfo=None):
        from dace.sdfg import SDFG
        super(NestedSDFG, self).__init__(label, location, inputs, outputs)

        # Properties
        self.sdfg: SDFG = sdfg
        self.symbol_mapping = symbol_mapping or {}
        self.schedule = schedule
        self.debuginfo = debuginfo

    @staticmethod
    def from_json(json_obj, context=None):
        from dace import SDFG  # Avoid import loop

        # We have to load the SDFG first.
        ret = NestedSDFG("nolabel", SDFG('nosdfg'), {}, {})

        dace.serialize.set_properties_from_json(ret, json_obj, context)

        if context and 'sdfg_state' in context:
            ret.sdfg.parent = context['sdfg_state']
        if context and 'sdfg' in context:
            ret.sdfg.parent_sdfg = context['sdfg']

        ret.sdfg.parent_nsdfg_node = ret

        ret.sdfg.update_sdfg_list([])

        return ret

    @property
    def free_symbols(self) -> Set[str]:
        return set().union(
            *(map(str,
                  pystr_to_symbolic(v).free_symbols)
              for v in self.symbol_mapping.values()),
            *(map(str,
                  pystr_to_symbolic(v).free_symbols)
              for v in self.location.values()))

    def infer_connector_types(self, sdfg, state):
        # Avoid import loop
        from dace.sdfg.infer_types import infer_connector_types
        # Infer internal connector types
        infer_connector_types(self.sdfg)

    def __str__(self):
        if not self.label:
            return "SDFG"
        else:
            return self.label

    def validate(self, sdfg, state):
        if not dtypes.validate_name(self.label):
            raise NameError('Invalid nested SDFG name "%s"' % self.label)
        for in_conn in self.in_connectors:
            if not dtypes.validate_name(in_conn):
                raise NameError('Invalid input connector "%s"' % in_conn)
        for out_conn in self.out_connectors:
            if not dtypes.validate_name(out_conn):
                raise NameError('Invalid output connector "%s"' % out_conn)
        connectors = self.in_connectors.keys() | self.out_connectors.keys()
        for dname, desc in self.sdfg.arrays.items():
            # TODO(later): Disallow scalars without access nodes (so that this
            #              check passes for them too).
            if isinstance(desc, data.Scalar):
                continue
            if not desc.transient and dname not in connectors:
                raise NameError('Data descriptor "%s" not found in nested '
                                'SDFG connectors' % dname)
            if dname in connectors and desc.transient:
                raise NameError(
                    '"%s" is a connector but its corresponding array is transient'
                    % dname)

        # Validate undefined symbols
        symbols = set(k for k in self.sdfg.free_symbols if k not in connectors)
        missing_symbols = [s for s in symbols if s not in self.symbol_mapping]
        if missing_symbols:
            raise ValueError('Missing symbols on nested SDFG: %s' %
                             (missing_symbols))
        extra_symbols = self.symbol_mapping.keys() - symbols
        if len(extra_symbols) > 0:
            # TODO: Elevate to an error?
            warnings.warn(
                f"{self.label} maps to unused symbol(s): {extra_symbols}")

        # Recursively validate nested SDFG
        self.sdfg.validate()
Esempio n. 4
0
class SubgraphTransformation(TransformationBase):
    """
    Base class for transformations that apply on arbitrary subgraphs, rather
    than matching a specific pattern.

    Subclasses need to implement the `can_be_applied` and `apply` operations,
    as well as registered with the subclass registry. See the `Transformation`
    class docstring for more information.
    """

    sdfg_id = Property(dtype=int, desc='ID of SDFG to transform')
    state_id = Property(
        dtype=int,
        desc='ID of state to transform subgraph within, or -1 to transform the '
        'SDFG')
    subgraph = SetProperty(element_type=int,
                           desc='Subgraph in transformation instance')

    def __init__(self,
                 subgraph: Union[Set[int], gr.SubgraphView],
                 sdfg_id: int = None,
                 state_id: int = None):
        if (not isinstance(subgraph, (gr.SubgraphView, SDFG, SDFGState))
                and (sdfg_id is None or state_id is None)):
            raise TypeError(
                'Subgraph transformation either expects a SubgraphView or a '
                'set of node IDs, SDFG ID and state ID (or -1).')

        # An entire graph is given as a subgraph
        if isinstance(subgraph, (SDFG, SDFGState)):
            subgraph = gr.SubgraphView(subgraph, subgraph.nodes())

        if isinstance(subgraph, gr.SubgraphView):
            self.subgraph = set(
                subgraph.graph.node_id(n) for n in subgraph.nodes())

            if isinstance(subgraph.graph, SDFGState):
                sdfg = subgraph.graph.parent
                self.sdfg_id = sdfg.sdfg_id
                self.state_id = sdfg.node_id(subgraph.graph)
            elif isinstance(subgraph.graph, SDFG):
                self.sdfg_id = subgraph.graph.sdfg_id
                self.state_id = -1
            else:
                raise TypeError('Unrecognized graph type "%s"' %
                                type(subgraph.graph).__name__)
        else:
            self.subgraph = subgraph
            self.sdfg_id = sdfg_id
            self.state_id = state_id

    def subgraph_view(self, sdfg: SDFG) -> gr.SubgraphView:
        graph = sdfg.sdfg_list[self.sdfg_id]
        if self.state_id != -1:
            graph = graph.node(self.state_id)
        return gr.SubgraphView(graph,
                               [graph.node(idx) for idx in self.subgraph])

    def can_be_applied(self, sdfg: SDFG, subgraph: gr.SubgraphView) -> bool:
        """
        Tries to match the transformation on a given subgraph, returning
        True if this transformation can be applied.
        :param sdfg: The SDFG that includes the subgraph.
        :param subgraph: The SDFG or state subgraph to try to apply the
                         transformation on.
        :return: True if the subgraph can be transformed, or False otherwise.
        """
        pass

    def apply(self, sdfg: SDFG):
        """
        Applies the transformation on the given subgraph.
        :param sdfg: The SDFG that includes the subgraph.
        """
        pass

    @classmethod
    def apply_to(cls,
                 sdfg: SDFG,
                 *where: Union[nd.Node, SDFGState, gr.SubgraphView],
                 verify: bool = True,
                 **options: Any):
        """
        Applies this transformation to a given subgraph, defined by a set of
        nodes. Raises an error if arguments are invalid or transformation is
        not applicable.

        To apply the transformation on a specific subgraph, the `where`
        parameter can be used either on a subgraph object (`SubgraphView`), or
        on directly on a list of subgraph nodes, given as `Node` or `SDFGState`
        objects. Transformation properties can then be given as keyword
        arguments. For example, applying `SubgraphFusion` on a subgraph of three
        nodes can be called in one of two ways:
        ```
        # Subgraph
        SubgraphFusion.apply_to(
            sdfg, SubgraphView(state, [node_a, node_b, node_c]))

        # Simplified API: list of nodes
        SubgraphFusion.apply_to(sdfg, node_a, node_b, node_c)
        ```

        :param sdfg: The SDFG to apply the transformation to.
        :param where: A set of nodes in the SDFG/state, or a subgraph thereof.
        :param verify: Check that `can_be_applied` returns True before applying.
        :param options: A set of parameters to use for applying the
                        transformation.
        """
        subgraph = None
        if len(where) == 1:
            if isinstance(where[0], (list, tuple)):
                where = where[0]
            elif isinstance(where[0], gr.SubgraphView):
                subgraph = where[0]
        if len(where) == 0:
            raise ValueError('At least one node is required')

        # Check that all keyword arguments are nodes and if interstate or not
        if subgraph is None:
            sample_node = where[0]

            if isinstance(sample_node, SDFGState):
                graph = sdfg
                state_id = -1
            elif isinstance(sample_node, nd.Node):
                graph = next(s for s in sdfg.nodes()
                             if sample_node in s.nodes())
                state_id = sdfg.node_id(graph)
            else:
                raise TypeError('Invalid node type "%s"' %
                                type(sample_node).__name__)

            # Construct subgraph and instantiate transformation
            subgraph = gr.SubgraphView(graph, where)
            instance = cls(subgraph, sdfg.sdfg_id, state_id)
        else:
            # Construct instance from subgraph directly
            instance = cls(subgraph)

        # Construct transformation parameters
        for optname, optval in options.items():
            if not optname in cls.__properties__:
                raise ValueError('Property "%s" not found in transformation' %
                                 optname)
            setattr(instance, optname, optval)

        if verify:
            if not instance.can_be_applied(sdfg, subgraph):
                raise ValueError('Transformation cannot be applied on the '
                                 'given subgraph ("can_be_applied" failed)')

        # Apply to SDFG
        return instance.apply(sdfg)

    def to_json(self, parent=None):
        props = serialize.all_properties_to_json(self)
        return {
            'type': 'SubgraphTransformation',
            'transformation': type(self).__name__,
            **props
        }

    @staticmethod
    def from_json(json_obj: Dict[str, Any],
                  context: Dict[str, Any] = None) -> 'SubgraphTransformation':
        xform = next(ext for ext in SubgraphTransformation.extensions().keys()
                     if ext.__name__ == json_obj['transformation'])

        # Reconstruct transformation
        ret = xform(json_obj['subgraph'], json_obj['sdfg_id'],
                    json_obj['state_id'])
        context = context or {}
        context['transformation'] = ret
        serialize.set_properties_from_json(
            ret,
            json_obj,
            context=context,
            ignore_properties={'transformation', 'type'})
        return ret
Esempio n. 5
0
class Pipeline(Map):
    """ This a convenience-subclass of Map that allows easier implementation of
        loop nests (using regular Map indices) that need a constant-sized
        initialization and drain phase (e.g., N*M + c iterations), which would
        otherwise need a flattened one-dimensional map.
    """
    init_size = SymbolicProperty(default=0,
                                 desc="Number of initialization iterations.")
    init_overlap = Property(
        dtype=bool,
        default=True,
        desc="Whether to increment regular map indices during initialization.")
    drain_size = SymbolicProperty(default=1,
                                  desc="Number of drain iterations.")
    drain_overlap = Property(
        dtype=bool,
        default=True,
        desc="Whether to increment regular map indices during pipeline drain.")
    additional_iterators = Property(
        dtype=dict,
        desc="Additional iterators, managed by the user inside the scope.")

    def __init__(self,
                 *args,
                 init_size=0,
                 init_overlap=False,
                 drain_size=0,
                 drain_overlap=False,
                 additional_iterators={},
                 **kwargs):
        super(Pipeline, self).__init__(*args, **kwargs)
        self.init_size = init_size
        self.init_overlap = init_overlap
        self.drain_size = drain_size
        self.drain_overlap = drain_overlap
        self.additional_iterators = additional_iterators

    def iterator_str(self):
        return "__" + "".join(self.params)

    def loop_bound_str(self):
        from dace.codegen.targets.common import sym2cpp
        bound = 1
        for begin, end, step in self.range:
            bound *= (step + end - begin) // step
        # Add init and drain phases when relevant
        add_str = (" + " + sym2cpp(self.init_size)
                   if self.init_size != 0 and not self.init_overlap else "")
        add_str += (" + " + sym2cpp(self.drain_size)
                    if self.drain_size != 0 and not self.drain_overlap else "")
        return sym2cpp(bound) + add_str

    def init_condition(self):
        """Variable that can be checked to see if pipeline is currently in
           initialization phase."""
        if self.init_size == 0:
            raise ValueError("No init condition exists for " + self.label)
        return self.iterator_str() + "_init"

    def drain_condition(self):
        """Variable that can be checked to see if pipeline is currently in
           draining phase."""
        if self.drain_size == 0:
            raise ValueError("No drain condition exists for " + self.label)
        return self.iterator_str() + "_drain"
Esempio n. 6
0
class Scalar(Data):
    """ Data descriptor of a scalar value. """

    allow_conflicts = Property(dtype=bool)

    def __init__(self,
                 dtype,
                 transient=False,
                 storage=dace.types.StorageType.Default,
                 allow_conflicts=False,
                 location='',
                 toplevel=False,
                 debuginfo=None):
        self.allow_conflicts = allow_conflicts
        shape = [1]
        super(Scalar, self).__init__(dtype, shape, transient, storage,
                                     location, toplevel, debuginfo)

    def __repr__(self):
        return 'Scalar (dtype=%s)' % self.dtype

    def clone(self):
        return Scalar(self.dtype, self.transient, self.storage,
                      self.allow_conflicts, self.location, self.toplevel,
                      self.debuginfo)

    @property
    def strides(self):
        return self.shape

    @property
    def offset(self):
        return [0]

    def is_equivalent(self, other):
        if not isinstance(other, Scalar):
            return False
        if self.dtype != other.type:
            return False
        return True

    def signature(self, with_types=True, for_call=False, name=None):
        if not with_types or for_call: return name
        return str(self.dtype.ctype) + ' ' + name

    def sizes(self):
        return None

    def covers_range(self, rng):
        if len(rng) != 1:
            return False

        rng = rng[0]

        try:
            if (rng[1] - rng[0]) > rng[2]:
                return False
        except TypeError:  # cannot determine truth value of Relational
            pass
            #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % ((rng[1] - rng[0]) > rng[2]),
            #      'If this expression is false, please refine symbol definitions in the program.')

        return True
Esempio n. 7
0
class Memlet(object):
    """ Data movement object. Represents the data, the subset moved, and the
        manner it is reindexed (`other_subset`) into the destination.
        If there are multiple conflicting writes, this object also specifies
        how they are resolved with a lambda function.
    """

    # Properties
    volume = SymbolicProperty(default=0,
                              desc='The exact number of elements moved '
                              'using this memlet, or the maximum number '
                              'if dynamic=True (with 0 as unbounded)')
    dynamic = Property(default=False,
                       desc='Is the number of elements moved determined at '
                       'runtime (e.g., data dependent)')
    subset = SubsetProperty(allow_none=True,
                            desc='Subset of elements to move from the data '
                            'attached to this edge.')
    other_subset = SubsetProperty(
        allow_none=True,
        desc='Subset of elements after reindexing to the data not attached '
        'to this edge (e.g., for offsets and reshaping).')
    data = DataProperty(desc='Data descriptor attached to this memlet')
    wcr = LambdaProperty(allow_none=True,
                         desc='If set, defines a write-conflict resolution '
                         'lambda function. The syntax of the lambda function '
                         'receives two elements: `current` value and `new` '
                         'value, and returns the value after resolution')

    # Code generation and validation hints
    debuginfo = DebugInfoProperty(desc='Line information to track source and '
                                  'generated code')
    wcr_nonatomic = Property(dtype=bool,
                             default=False,
                             desc='If True, always generates non-conflicting '
                             '(non-atomic) writes in resulting code')
    allow_oob = Property(dtype=bool,
                         default=False,
                         desc='Bypass out-of-bounds validation')

    def __init__(self,
                 expr: str = None,
                 data: str = None,
                 subset: Union[str, subsets.Subset] = None,
                 other_subset: Union[str, subsets.Subset] = None,
                 volume: Union[int, str, symbolic.SymbolicType] = None,
                 dynamic: bool = False,
                 wcr: Union[str, ast.AST] = None,
                 debuginfo: dtypes.DebugInfo = None,
                 wcr_nonatomic: bool = False,
                 allow_oob: bool = False):
        """ 
        Constructs a Memlet.
        :param expr: A string expression of the this memlet, given as an ease
                     of use API. Must follow one of the following forms:
                     1. ``ARRAY``,
                     2. ``ARRAY[SUBSET]``,
                     3. ``ARRAY[SUBSET] -> OTHER_SUBSET``.
        :param data: (DEPRECATED) Data descriptor name attached to this memlet.
        :param subset: The subset to take from the data attached to the edge,
                       represented either as a string or a Subset object.
        :param other_subset: The subset to offset into the other side of the
                             memlet, represented either as a string or a Subset 
                             object.
        :param volume: The exact number of elements moved using this
                       memlet, or the maximum number of elements if
                       ``dynamic`` is set to True. If dynamic and this
                       value is set to zero, the number of elements moved
                       is runtime-defined and unbounded.
        :param dynamic: If True, the number of elements moved in this memlet
                        is defined dynamically at runtime.
        :param wcr: A lambda function (represented as a string or Python AST) 
                    specifying how write-conflicts are resolved. The syntax
                    of the lambda function receives two elements: ``current`` 
                    value and `new` value, and returns the value after 
                    resolution. For example, summation is represented by
                    ``'lambda cur, new: cur + new'``.
        :param debuginfo: Line information from the generating source code.
        :param wcr_nonatomic: If True, overrides the automatic code generator 
                              decision and treat all write-conflict resolution
                              operations as non-atomic, which might cause race
                              conditions in the general case.
        :param allow_oob: If True, bypasses the checks in SDFG validation for
                          out-of-bounds accesses in memlet subsets.
        """

        # Will be set once memlet is added into an SDFG (in try_initialize)
        self._sdfg = None
        self._state = None
        self._edge = None

        # Field caching which subset belongs to source or destination of memlet
        self._is_data_src = None

        # Initialize first by string expression
        self.data = None
        self.subset = None
        self.other_subset = None
        if expr is not None:
            self._parse_memlet_from_str(expr)

        # Set properties
        self.data = self.data or data
        self.subset = self.subset or subset
        self.other_subset = self.other_subset or other_subset

        if volume is not None:
            self.volume = volume
        else:
            if self.subset is not None:
                self.volume = self.subset.num_elements()
            elif self.other_subset is not None:
                self.volume = self.other_subset.num_elements()
            else:
                self.volume = 1

        self.dynamic = dynamic
        self.wcr = wcr
        self.wcr_nonatomic = wcr_nonatomic
        self.debuginfo = debuginfo
        self.allow_oob = allow_oob

    def to_json(self):
        attrs = dace.serialize.all_properties_to_json(self)

        # Fill in new values
        if self.src_subset is not None:
            attrs['src_subset'] = self.src_subset.to_json()
        else:
            attrs['src_subset'] = None
        if self.dst_subset is not None:
            attrs['dst_subset'] = self.dst_subset.to_json()
        else:
            attrs['dst_subset'] = None

        # Fill in legacy (DEPRECATED) values for backwards compatibility
        attrs['num_accesses'] = \
            str(self.volume) if not self.dynamic else -1

        return {"type": "Memlet", "attributes": attrs}

    @staticmethod
    def from_json(json_obj, context=None):
        ret = Memlet()
        dace.serialize.set_properties_from_json(
            ret,
            json_obj,
            context=context,
            ignore_properties={'src_subset', 'dst_subset', 'num_accesses'})
        if context:
            ret._sdfg = context['sdfg']
            ret._state = context['sdfg_state']
        return ret

    def __deepcopy__(self, memo):
        node = object.__new__(Memlet)

        # Set properties
        node.volume = dcpy(self.volume, memo=memo)
        node._dynamic = self._dynamic
        node.subset = dcpy(self.subset, memo=memo)
        node.other_subset = dcpy(self.other_subset, memo=memo)
        node.data = dcpy(self.data, memo=memo)
        node.wcr = dcpy(self.wcr, memo=memo)
        node.debuginfo = dcpy(self.debuginfo, memo=memo)
        node._wcr_nonatomic = self._wcr_nonatomic
        node._allow_oob = self._allow_oob
        node._is_data_src = self._is_data_src

        # Nullify graph references
        node._sdfg = None
        node._state = None
        node._edge = None

        return node

    def is_empty(self) -> bool:
        """ 
        Returns True if this memlet carries no data. Memlets without data are
        primarily used for connecting nodes to scopes without transferring 
        data to them. 
        """
        return (self.data is None and self.src_subset is None
                and self.dst_subset is None)

    @property
    def num_accesses(self):
        """ 
        Returns the total memory movement volume (in elements) of this memlet.
        """
        return self.volume

    @num_accesses.setter
    def num_accesses(self, value):
        self.volume = value

    @staticmethod
    def simple(data,
               subset_str,
               wcr_str=None,
               other_subset_str=None,
               wcr_conflict=True,
               num_accesses=None,
               debuginfo=None,
               dynamic=False):
        """ DEPRECATED: Constructs a Memlet from string-based expressions.
            :param data: The data object or name to access. 
            :type data: Either a string of the data descriptor name or an
                        AccessNode.
            :param subset_str: The subset of `data` that is going to
                               be accessed in string format. Example: '0:N'.
            :param wcr_str: A lambda function (as a string) specifying
                            how write-conflicts are resolved. The syntax
                            of the lambda function receives two elements:
                            `current` value and `new` value,
                            and returns the value after resolution. For
                            example, summation is
                            `'lambda cur, new: cur + new'`.
            :param other_subset_str: The reindexing of `subset` on the other
                                     connected data (as a string).
            :param wcr_conflict: If False, forces non-locked conflict
                                 resolution when generating code. The default
                                 is to let the code generator infer this
                                 information from the SDFG.
            :param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 -1, designates that the number of accesses is
                                 unknown at compile time.
            :param debuginfo: Source-code information (e.g., line, file)
                              used for debugging.
            :param dynamic: If True, the number of elements moved in this memlet
                            is defined dynamically at runtime.
        """
        # warnings.warn(
        #     'This function is deprecated, please use the Memlet '
        #     'constructor instead', DeprecationWarning)

        result = Memlet()

        if isinstance(subset_str, subsets.Subset):
            result.subset = subset_str
        else:
            result.subset = SubsetProperty.from_string(subset_str)

        result.dynamic = dynamic

        if num_accesses is not None:
            if num_accesses == -1:
                result.dynamic = True
                result.volume = 0
            else:
                result.volume = num_accesses
        else:
            result.volume = result._subset.num_elements()

        if wcr_str is not None:
            if isinstance(wcr_str, ast.AST):
                result.wcr = wcr_str
            else:
                result.wcr = LambdaProperty.from_string(wcr_str)

        if other_subset_str is not None:
            if isinstance(other_subset_str, subsets.Subset):
                result.other_subset = other_subset_str
            else:
                result.other_subset = SubsetProperty.from_string(
                    other_subset_str)
        else:
            result.other_subset = None

        # If it is an access node or another memlet
        if hasattr(data, 'data'):
            result.data = data.data
        else:
            result.data = data

        result.wcr_nonatomic = not wcr_conflict

        return result

    def _parse_from_subexpr(self, expr: str):
        if expr[-1] != ']':  # No subset given, try to use whole array
            if not dtypes.validate_name(expr):
                raise SyntaxError('Invalid memlet syntax "%s"' % expr)
            return expr, None

        # array[subset] syntax
        arrname, subset_str = expr[:-1].split('[')
        if not dtypes.validate_name(arrname):
            raise SyntaxError('Invalid array name "%s" in memlet' % arrname)
        return arrname, SubsetProperty.from_string(subset_str)

    def _parse_memlet_from_str(self, expr: str):
        """
        Parses a memlet and fills in either the src_subset,dst_subset fields
        or the _data,_subset fields.
        :param expr: A string expression of the this memlet, given as an ease
                of use API. Must follow one of the following forms:
                1. ``ARRAY``,
                2. ``ARRAY[SUBSET]``,
                3. ``ARRAY[SUBSET] -> OTHER_SUBSET``.
                Note that modes 2 and 3 are deprecated and will leave 
                the memlet uninitialized until inserted into an SDFG.
        """
        expr = expr.strip()
        if '->' not in expr:  # Options 1 and 2
            self.data, self.subset = self._parse_from_subexpr(expr)
            return

        # Option 3
        src_expr, dst_expr = expr.split('->')
        src_expr = src_expr.strip()
        dst_expr = dst_expr.strip()
        if '[' not in src_expr and not dtypes.validate_name(src_expr):
            raise SyntaxError('Expression without data name not yet allowed')

        self.data, self.subset = self._parse_from_subexpr(src_expr)
        self.other_subset = SubsetProperty.from_string(dst_expr)

    def try_initialize(self, sdfg: 'dace.sdfg.SDFG',
                       state: 'dace.sdfg.SDFGState',
                       edge: 'dace.sdfg.graph.MultiConnectorEdge'):
        """ 
        Tries to initialize the internal fields of the memlet (e.g., src/dst 
        subset) once it is added to an SDFG as an edge.
        """
        from dace.sdfg.nodes import AccessNode, CodeNode  # Avoid import loops
        self._sdfg = sdfg
        self._state = state
        self._edge = edge

        # If memlet is code->code, ensure volume=1
        if (isinstance(edge.src, CodeNode) and isinstance(edge.dst, CodeNode)
                and self.volume == 0):
            self.volume = 1

        # Find source/destination of memlet
        try:
            path = state.memlet_path(edge)
        except (ValueError, AssertionError, StopIteration):
            # Cannot initialize yet
            return

        is_data_src = True
        if isinstance(path[-1].dst, AccessNode):
            if path[-1].dst.data == self._data:
                is_data_src = False
        self._is_data_src = is_data_src

        # If subset is None, fill in with entire array
        if (self.data is not None and self.subset is None):
            self.subset = subsets.Range.from_array(sdfg.arrays[self.data])

    @staticmethod
    def from_array(dataname, datadesc, wcr=None):
        """ Constructs a Memlet that transfers an entire array's contents.
            :param dataname: The name of the data descriptor in the SDFG.
            :param datadesc: The data descriptor object.
            :param wcr: The conflict resolution lambda.
            :type datadesc: Data
        """
        rng = subsets.Range.from_array(datadesc)
        return Memlet.simple(dataname, rng, wcr_str=wcr)

    def __hash__(self):
        return hash(
            (self.volume, self.src_subset, self.dst_subset, str(self.wcr)))

    def __eq__(self, other):
        return all([
            self.volume == other.volume, self.src_subset == other.src_subset,
            self.dst_subset == other.dst_subset, self.wcr == other.wcr
        ])

    def replace(self, repl_dict):
        """ Substitute a given set of symbols with a different set of symbols.
            :param repl_dict: A dict of string symbol names to symbols with
                              which to replace them.
        """
        repl_to_intermediate = {}
        repl_to_final = {}
        for symbol in repl_dict:
            if str(symbol) != str(repl_dict[symbol]):
                intermediate = symbolic.symbol('__dacesym_' + str(symbol))
                repl_to_intermediate[symbolic.symbol(symbol)] = intermediate
                repl_to_final[intermediate] = repl_dict[symbol]

        if len(repl_to_intermediate) > 0:
            if self.volume is not None and symbolic.issymbolic(self.volume):
                self.volume = self.volume.subs(repl_to_intermediate)
                self.volume = self.volume.subs(repl_to_final)
            if self.subset is not None:
                self.subset.replace(repl_to_intermediate)
                self.subset.replace(repl_to_final)
            if self.other_subset is not None:
                self.other_subset.replace(repl_to_intermediate)
                self.other_subset.replace(repl_to_final)

    def num_elements(self):
        """ Returns the number of elements in the Memlet subset. """
        if self.subset:
            return self.subset.num_elements()
        elif self.other_subset:
            return self.other_subset.num_elements()
        return 0

    def bounding_box_size(self):
        """ Returns a per-dimension upper bound on the maximum number of
            elements in each dimension.

            This bound will be tight in the case of Range.
        """
        if self.src_subset:
            return self.src_subset.bounding_box_size()
        elif self.dst_subset:
            return self.dst_subset.bounding_box_size()
        return []

    # New fields
    @property
    def src_subset(self):
        if self._is_data_src is not None:
            return self.subset if self._is_data_src else self.other_subset
        return self.subset

    @src_subset.setter
    def src_subset(self, new_src_subset):
        if self._is_data_src is not None:
            if self._is_data_src:
                self.subset = new_src_subset
            else:
                self.other_subset = new_src_subset
        else:
            self.subset = new_src_subset

    @property
    def dst_subset(self):
        if self._is_data_src is not None:
            return self.other_subset if self._is_data_src else self.subset
        return self.other_subset

    @dst_subset.setter
    def dst_subset(self, new_dst_subset):
        if self._is_data_src is not None:
            if self._is_data_src:
                self.other_subset = new_dst_subset
            else:
                self.subset = new_dst_subset
        else:
            self.other_subset = new_dst_subset

    def validate(self, sdfg, state):
        if self.data is not None and self.data not in sdfg.arrays:
            raise KeyError('Array "%s" not found in SDFG' % self.data)

    @property
    def free_symbols(self) -> Set[str]:
        """ Returns a set of symbols used in this edge's properties. """
        # Symbolic properties are in volume, and the two subsets
        result = set()
        result |= set(map(str, self.volume.free_symbols))
        if self.src_subset:
            result |= self.src_subset.free_symbols
        if self.dst_subset:
            result |= self.dst_subset.free_symbols
        return result

    def __label__(self, sdfg, state):
        """ Returns a string representation of the memlet for display in a
            graph.

            :param sdfg: The SDFG in which the memlet resides.
            :param state: An SDFGState object in which the memlet resides.
        """
        if self.data is None:
            return self._label(None)
        return self._label(sdfg.arrays[self.data].shape)

    def __str__(self):
        return self._label(None)

    def _label(self, shape):
        result = ''
        if self.data is not None:
            result = self.data

        if self.subset is None:
            return result

        num_elements = self.subset.num_elements()
        if self.dynamic:
            result += '(dyn) '
        elif self.volume != num_elements:
            result += '(%s) ' % SymbolicProperty.to_string(self.volume)
        arrayNotation = True
        try:
            if shape is not None and reduce(operator.mul, shape, 1) == 1:
                # Don't mention array if we're accessing a single element and it's zero
                if all(s == 0 for s in self.subset.min_element()):
                    arrayNotation = False
        except TypeError:
            # Will fail if trying to check the truth value of a sympy expr
            pass
        if arrayNotation:
            result += '[%s]' % str(self.subset)
        if self.wcr is not None and str(self.wcr) != '':
            # Autodetect reduction type
            redtype = detect_reduction_type(self.wcr)
            if redtype == dtypes.ReductionType.Custom:
                wcrstr = unparse(ast.parse(self.wcr).body[0].value.body)
            else:
                wcrstr = str(redtype)
                wcrstr = wcrstr[wcrstr.find('.') + 1:]  # Skip "ReductionType."

            result += ' (CR: %s)' % wcrstr

        if self.other_subset is not None:
            result += ' -> [%s]' % str(self.other_subset)
        return result

    def __repr__(self):
        return "Memlet (" + self.__str__() + ")"
Esempio n. 8
0
class GPUTransformSDFG(transformation.Transformation):
    """ Implements the GPUTransformSDFG transformation.

        Transforms a whole SDFG to run on the GPU:
        Steps of the full GPU transform
          0. Acquire metadata about SDFG and arrays
          1. Replace all non-transients with their GPU counterparts
          2. Copy-in state from host to GPU
          3. Copy-out state from GPU to host
          4. Re-store Default-top/CPU_Heap transients as GPU_Global
          5. Global tasklets are wrapped with a map of size 1
          6. Global Maps are re-scheduled to use the GPU
          7. Make data ready for interstate edges that use them
          8. Re-apply strict transformations to get rid of extra states and
             transients
    """

    toplevel_trans = Property(desc="Make all GPU transients top-level",
                              dtype=bool,
                              default=True)

    register_trans = Property(
        desc="Make all transients inside GPU maps registers",
        dtype=bool,
        default=True)

    sequential_innermaps = Property(desc="Make all internal maps Sequential",
                                    dtype=bool,
                                    default=True)

    skip_scalar_tasklets = Property(desc="If True, does not transform tasklets "
                                    "that manipulate (Default-stored) scalars",
                                    dtype=bool,
                                    default=True)

    strict_transform = Property(
        desc='Reapply strict transformations after modifying graph',
        dtype=bool,
        default=True)

    exclude_copyin = Property(
        desc="Exclude these arrays from being copied into the device "
        "(comma-separated)",
        dtype=str,
        default='')

    exclude_tasklets = Property(
        desc="Exclude these tasklets from being processed as CPU tasklets "
        "(comma-separated)",
        dtype=str,
        default='')

    exclude_copyout = Property(
        desc="Exclude these arrays from being copied out of the device "
        "(comma-separated)",
        dtype=str,
        default='')

    @staticmethod
    def annotates_memlets():
        # Skip memlet propagation for now
        return True

    @staticmethod
    def expressions():
        # Matches anything
        return [sd.SDFG('_')]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        for node, _ in sdfg.all_nodes_recursive():
            # Consume scopes are currently unsupported
            if isinstance(node, (nodes.ConsumeEntry, nodes.ConsumeExit)):
                return False

        for state in sdfg.nodes():
            schildren = state.scope_children()
            for node in schildren[None]:
                # If two top-level tasklets are connected with a code->code
                # memlet, they will transform into an invalid SDFG
                if (isinstance(node, nodes.CodeNode) and any(
                        isinstance(e.dst, nodes.CodeNode)
                        for e in state.out_edges(node))):
                    return False
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        return graph.label

    def apply(self, sdfg: sd.SDFG):

        #######################################################
        # Step 0: SDFG metadata

        # Find all input and output data descriptors
        input_nodes = []
        output_nodes = []
        global_code_nodes: Dict[sd.SDFGState, nodes.Tasklet] = defaultdict(list)

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.desc(sdfg).transient == False):
                    if (state.out_degree(node) > 0
                            and node.data not in input_nodes):
                        # Special case: nodes that lead to top-level dynamic
                        # map ranges must stay on host
                        for e in state.out_edges(node):
                            last_edge = state.memlet_path(e)[-1]
                            if (isinstance(last_edge.dst, nodes.EntryNode)
                                    and last_edge.dst_conn
                                    and not last_edge.dst_conn.startswith('IN_')
                                    and sdict[last_edge.dst] is None):
                                break
                        else:
                            input_nodes.append((node.data, node.desc(sdfg)))
                    if (state.in_degree(node) > 0
                            and node.data not in output_nodes):
                        output_nodes.append((node.data, node.desc(sdfg)))

            # Input nodes may also be nodes with WCR memlets and no identity
            for e in state.edges():
                if e.data.wcr is not None:
                    if (e.data.data not in input_nodes
                            and sdfg.arrays[e.data.data].transient == False):
                        input_nodes.append(
                            (e.data.data, sdfg.arrays[e.data.data]))

        start_state = sdfg.start_state
        end_states = sdfg.sink_nodes()

        #######################################################
        # Step 1: Create cloned GPU arrays and replace originals

        cloned_arrays = {}
        for inodename, inode in set(input_nodes):
            if isinstance(inode, data.Scalar):  # Scalars can remain on host
                continue
            if inode.storage == dtypes.StorageType.GPU_Global:
                continue
            newdesc = inode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            name = sdfg.add_datadesc('gpu_' + inodename,
                                     newdesc,
                                     find_new_name=True)
            cloned_arrays[inodename] = name

        for onodename, onode in set(output_nodes):
            if onodename in cloned_arrays:
                continue
            if onode.storage == dtypes.StorageType.GPU_Global:
                continue
            newdesc = onode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            name = sdfg.add_datadesc('gpu_' + onodename,
                                     newdesc,
                                     find_new_name=True)
            cloned_arrays[onodename] = name

        # Replace nodes
        for state in sdfg.nodes():
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.data in cloned_arrays):
                    node.data = cloned_arrays[node.data]

        # Replace memlets
        for state in sdfg.nodes():
            for edge in state.edges():
                if edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]

        #######################################################
        # Step 2: Create copy-in state
        excluded_copyin = self.exclude_copyin.split(',')

        copyin_state = sdfg.add_state(sdfg.label + '_copyin')
        sdfg.add_edge(copyin_state, start_state, sd.InterstateEdge())

        for nname, desc in dtypes.deduplicate(input_nodes):
            if nname in excluded_copyin or nname not in cloned_arrays:
                continue
            src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            copyin_state.add_node(src_array)
            copyin_state.add_node(dst_array)
            copyin_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg)))

        #######################################################
        # Step 3: Create copy-out state
        excluded_copyout = self.exclude_copyout.split(',')

        copyout_state = sdfg.add_state(sdfg.label + '_copyout')
        for state in end_states:
            sdfg.add_edge(state, copyout_state, sd.InterstateEdge())

        for nname, desc in dtypes.deduplicate(output_nodes):
            if nname in excluded_copyout or nname not in cloned_arrays:
                continue
            src_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            copyout_state.add_node(src_array)
            copyout_state.add_node(dst_array)
            copyout_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg)))

        #######################################################
        # Step 4: Modify transient data storage

        const_syms = xfh.constant_symbols(sdfg)

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node,
                              nodes.AccessNode) and node.desc(sdfg).transient:
                    nodedesc = node.desc(sdfg)

                    # Special case: nodes that lead to dynamic map ranges must
                    # stay on host
                    if any(
                            isinstance(
                                state.memlet_path(e)[-1].dst, nodes.EntryNode)
                            for e in state.out_edges(node)):
                        continue

                    gpu_storage = [
                        dtypes.StorageType.GPU_Global,
                        dtypes.StorageType.GPU_Shared,
                        dtypes.StorageType.CPU_Pinned
                    ]
                    if sdict[
                            node] is None and nodedesc.storage not in gpu_storage:
                        # NOTE: the cloned arrays match too but it's the same
                        # storage so we don't care
                        nodedesc.storage = dtypes.StorageType.GPU_Global

                        # Try to move allocation/deallocation out of loops
                        dsyms = set(map(str, nodedesc.free_symbols))
                        if (self.toplevel_trans
                                and not isinstance(nodedesc, (data.Stream,
                                                              data.View))
                                and len(dsyms - const_syms) == 0):
                            nodedesc.lifetime = dtypes.AllocationLifetime.SDFG
                    elif nodedesc.storage not in gpu_storage:
                        # Make internal transients registers
                        if self.register_trans:
                            nodedesc.storage = dtypes.StorageType.Register

        #######################################################
        # Step 5: Change all top-level maps and library nodes to GPU schedule

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if sdict[node] is None:
                    if isinstance(node, (nodes.LibraryNode, nodes.NestedSDFG)):
                        node.schedule = dtypes.ScheduleType.GPU_Default
                    elif isinstance(node, nodes.EntryNode):
                        node.schedule = dtypes.ScheduleType.GPU_Device
                elif self.sequential_innermaps:
                    if isinstance(node, (nodes.EntryNode, nodes.LibraryNode)):
                        node.schedule = dtypes.ScheduleType.Sequential
                    elif isinstance(node, nodes.NestedSDFG):
                        for nnode, _ in node.sdfg.all_nodes_recursive():
                            if isinstance(nnode,
                                          (nodes.EntryNode, nodes.LibraryNode)):
                                nnode.schedule = dtypes.ScheduleType.Sequential

        #######################################################
        # Step 6: Wrap free tasklets and nested SDFGs with a GPU map

        # Collect free tasklets
        for node, state in sdfg.all_nodes_recursive():
            if isinstance(node, nodes.Tasklet):
                if (state.entry_node(node) is None
                        and not scope.is_devicelevel_gpu(
                            state.parent, state, node, with_gpu_default=True)):
                    global_code_nodes[state].append(node)

        for state, gcodes in global_code_nodes.items():
            for gcode in gcodes:
                if gcode.label in self.exclude_tasklets.split(','):
                    continue
                # Create map and connectors
                me, mx = state.add_map(gcode.label + '_gmap',
                                       {gcode.label + '__gmapi': '0:1'},
                                       schedule=dtypes.ScheduleType.GPU_Device)
                # Store in/out edges in lists so that they don't get corrupted
                # when they are removed from the graph
                in_edges = list(state.in_edges(gcode))
                out_edges = list(state.out_edges(gcode))
                me.in_connectors = {('IN_' + e.dst_conn): None
                                    for e in in_edges}
                me.out_connectors = {('OUT_' + e.dst_conn): None
                                     for e in in_edges}
                mx.in_connectors = {('IN_' + e.src_conn): None
                                    for e in out_edges}
                mx.out_connectors = {('OUT_' + e.src_conn): None
                                     for e in out_edges}

                # Create memlets through map
                for e in in_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn,
                                   e.data)
                    state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn,
                                   e.data)
                for e in out_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn,
                                   e.data)
                    state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn,
                                   e.data)

                # Map without inputs
                if len(in_edges) == 0:
                    state.add_nedge(me, gcode, memlet.Memlet())
        #######################################################
        # Step 7: Introduce copy-out if data used in outgoing interstate edges

        for state in list(sdfg.nodes()):
            arrays_used = set()
            for e in sdfg.out_edges(state):
                # Used arrays = intersection between symbols and cloned arrays
                arrays_used.update(
                    set(e.data.free_symbols)
                    & set(cloned_arrays.keys()))

            # Create a state and copy out used arrays
            if len(arrays_used) > 0:
                co_state = sdfg.add_state(state.label + '_icopyout')

                # Reconnect outgoing edges to after interim copyout state
                for e in sdfg.out_edges(state):
                    sdutil.change_edge_src(sdfg, state, co_state)
                # Add unconditional edge to interim state
                sdfg.add_edge(state, co_state, sd.InterstateEdge())

                # Add copy-out nodes
                for nname in arrays_used:
                    desc = sdfg.arrays[nname]
                    src_array = nodes.AccessNode(cloned_arrays[nname],
                                                 debuginfo=desc.debuginfo)
                    dst_array = nodes.AccessNode(nname,
                                                 debuginfo=desc.debuginfo)
                    co_state.add_node(src_array)
                    co_state.add_node(dst_array)
                    co_state.add_nedge(
                        src_array, dst_array,
                        memlet.Memlet.from_array(dst_array.data,
                                                 dst_array.desc(sdfg)))

        #######################################################
        # Step 8: Strict transformations
        if not self.strict_transform:
            return

        # Apply strict state fusions greedily.
        sdfg.apply_strict_transformations()
Esempio n. 9
0
class Reduce(dace.sdfg.nodes.LibraryNode):
    """ An SDFG node that reduces an N-dimensional array to an
        (N-k)-dimensional array, with a list of axes to reduce and
        a reduction binary function. """

    # Global properties
    implementations = {
        'pure': ExpandReducePure,
        'OpenMP': ExpandReduceOpenMP,
        'CUDA (device)': ExpandReduceCUDADevice,
        'CUDA (block)': ExpandReduceCUDABlock,
        'CUDA (block allreduce)': ExpandReduceCUDABlockAll,
        'FPGAPartialReduction': ExpandReduceFPGAPartialReduction
        # 'CUDA (warp)': ExpandReduceCUDAWarp,
        # 'CUDA (warp allreduce)': ExpandReduceCUDAWarpAll
    }

    default_implementation = 'pure'

    # Properties
    axes = ListProperty(element_type=int, allow_none=True)
    wcr = LambdaProperty(default='lambda a, b: a')
    identity = Property(allow_none=True)

    def __init__(self,
                 wcr='lambda a, b: a',
                 axes=None,
                 identity=None,
                 schedule=dtypes.ScheduleType.Default,
                 debuginfo=None,
                 **kwargs):
        super().__init__(name='Reduce', **kwargs)
        self.wcr = wcr
        self.axes = axes
        self.identity = identity
        self.debuginfo = debuginfo
        self.schedule = schedule

    @staticmethod
    def from_json(json_obj, context=None):
        ret = Reduce("lambda a, b: a", None)
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)
        return ret

    def __str__(self):
        # Autodetect reduction type
        redtype = detect_reduction_type(self.wcr)
        if redtype == dtypes.ReductionType.Custom:
            wcrstr = unparse(ast.parse(self.wcr).body[0].value.body)
        else:
            wcrstr = str(redtype)
            wcrstr = wcrstr[wcrstr.find('.') + 1:]  # Skip "ReductionType."

        return 'Reduce ({op}), Axes: {axes}'.format(
            axes=('all' if self.axes is None else str(self.axes)), op=wcrstr)

    def __label__(self, sdfg, state):
        return str(self).replace(' Axes', '\nAxes')

    def validate(self, sdfg, state):
        if len(state.in_edges(self)) != 1:
            raise ValueError('Reduce node must have one input')
        if len(state.out_edges(self)) != 1:
            raise ValueError('Reduce node must have one output')
Esempio n. 10
0
class GPUTransformState(pattern_matching.Transformation):
    """ Implements the GPUTransformState transformation.

        Transforms a whole SDFG to run on the GPU:
        Steps of the full GPU transform
          0. Acquire metadata about SDFG and arrays
          1. Replace all non-transients with their GPU counterparts
          2. Copy-in state from host to GPU
          3. Copy-out state from GPU to host
          4. Re-store Default-top/CPU_Heap transients as GPU_Global
          5. Global tasklets are wrapped with a map of size 1
          6. Global Maps are re-scheduled to use the GPU
          7. Re-apply strict transformations to get rid of extra states and 
             transients
    """

    toplevel_trans = Property(desc="Make all GPU transients top-level",
                              dtype=bool,
                              default=True)
    register_trans = Property(
        desc="Make all transients inside GPU maps registers",
        dtype=bool,
        default=True)
    sequential_innermaps = Property(desc="Make all internal maps Sequential",
                                    dtype=bool,
                                    default=True)
    strict_transform = Property(
        desc='Reapply strict transformations after modifying graph',
        dtype=bool,
        default=True)

    @staticmethod
    def annotates_memlets():
        # Skip memlet propagation for now
        return True

    @staticmethod
    def expressions():
        # Matches anything
        return [sd.SDFG('_')]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        return graph.label

    def modifies_graph(self):
        return True

    def apply(self, sdfg: sd.SDFG):

        #######################################################
        # Step 0: SDFG metadata

        # Find all input and output data descriptors
        input_nodes = []
        output_nodes = []
        global_code_nodes = [[] for _ in sdfg.nodes()]

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.desc(sdfg).transient == False):
                    if (state.out_degree(node) > 0
                            and node.data not in input_nodes):
                        input_nodes.append((node.data, node.desc(sdfg)))
                    if (state.in_degree(node) > 0
                            and node.data not in output_nodes):
                        output_nodes.append((node.data, node.desc(sdfg)))
                elif isinstance(node, nodes.CodeNode) and sdict[node] is None:
                    if not isinstance(node, nodes.EmptyTasklet):
                        global_code_nodes[i].append(node)

            # Input nodes may also be nodes with WCR memlets and no identity
            for e in state.edges():
                if e.data.wcr is not None and e.data.wcr_identity is None:
                    if (e.data.data not in input_nodes
                            and sdfg.arrays[e.data.data].transient == False):
                        input_nodes.append(e.data.data)

        start_state = sdfg.start_state
        end_states = sdfg.sink_nodes()

        #######################################################
        # Step 1: Create cloned GPU arrays and replace originals

        cloned_arrays = {}
        for inodename, inode in input_nodes:
            newdesc = inode.clone()
            newdesc.storage = types.StorageType.GPU_Global
            newdesc.transient = True
            sdfg.add_datadesc('gpu_' + inodename, newdesc)
            cloned_arrays[inodename] = 'gpu_' + inodename

        for onodename, onode in output_nodes:
            if onodename in cloned_arrays:
                continue
            newdesc = onode.clone()
            newdesc.storage = types.StorageType.GPU_Global
            newdesc.transient = True
            sdfg.add_datadesc('gpu_' + onodename, newdesc)
            cloned_arrays[onodename] = 'gpu_' + onodename

        # Replace nodes
        for state in sdfg.nodes():
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.data in cloned_arrays):
                    node.data = cloned_arrays[node.data]

        # Replace memlets
        for state in sdfg.nodes():
            for edge in state.edges():
                if edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]

        #######################################################
        # Step 2: Create copy-in state

        copyin_state = sdfg.add_state(sdfg.label + '_copyin')
        sdfg.add_edge(copyin_state, start_state, ed.InterstateEdge())

        for nname, desc in input_nodes:
            src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            copyin_state.add_node(src_array)
            copyin_state.add_node(dst_array)
            copyin_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg)))

        #######################################################
        # Step 3: Create copy-out state

        copyout_state = sdfg.add_state(sdfg.label + '_copyout')
        for state in end_states:
            sdfg.add_edge(state, copyout_state, ed.InterstateEdge())

        for nname, desc in output_nodes:
            src_array = nodes.AccessNode(cloned_arrays[nname],
                                         debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            copyout_state.add_node(src_array)
            copyout_state.add_node(dst_array)
            copyout_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg)))

        #######################################################
        # Step 4: Modify transient data storage

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node,
                              nodes.AccessNode) and node.desc(sdfg).transient:
                    nodedesc = node.desc(sdfg)
                    if sdict[node] is None:
                        # NOTE: the cloned arrays match too but it's the same
                        # storage so we don't care
                        nodedesc.storage = types.StorageType.GPU_Global

                        # Try to move allocation/deallocation out of loops
                        if self.toplevel_trans:
                            nodedesc.toplevel = True
                    else:
                        # Make internal transients registers
                        if self.register_trans:
                            nodedesc.storage = types.StorageType.Register

        #######################################################
        # Step 5: Wrap free tasklets and nested SDFGs with a GPU map

        for state, gcodes in zip(sdfg.nodes(), global_code_nodes):
            for gcode in gcodes:
                # Create map and connectors
                me, mx = state.add_map(gcode.label + '_gmap',
                                       {gcode.label + '__gmapi': '0:1'},
                                       schedule=types.ScheduleType.GPU_Device)
                # Store in/out edges in lists so that they don't get corrupted
                # when they are removed from the graph
                in_edges = list(state.in_edges(gcode))
                out_edges = list(state.out_edges(gcode))
                me.in_connectors = set('IN_' + e.dst_conn for e in in_edges)
                me.out_connectors = set('OUT_' + e.dst_conn for e in in_edges)
                mx.in_connectors = set('IN_' + e.src_conn for e in out_edges)
                mx.out_connectors = set('OUT_' + e.src_conn for e in out_edges)

                # Create memlets through map
                for e in in_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn,
                                   e.data)
                    state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn,
                                   e.data)
                for e in out_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn,
                                   e.data)
                    state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn,
                                   e.data)

                # Map without inputs
                if len(in_edges) == 0:
                    state.add_nedge(me, gcode, memlet.EmptyMemlet())
        #######################################################
        # Step 6: Change all top-level maps to GPU maps

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node, nodes.EntryNode):
                    if sdict[node] is None:
                        node.schedule = types.ScheduleType.GPU_Device
                    elif self.sequential_innermaps:
                        node.schedule = types.ScheduleType.Sequential

        #######################################################
        # Step 7: Strict transformations
        if not self.strict_transform:
            return

        # Apply strict state fusions greedily.
        opt = optimizer.SDFGOptimizer(sdfg, inplace=True)
        fusions = 0
        arrays = 0
        options = [
            match for match in opt.get_pattern_matches(strict=True)
            if isinstance(match, (StateFusion, RedundantArray))
        ]
        while options:
            ssdfg = sdfg.sdfg_list[options[0].sdfg_id]
            options[0].apply(ssdfg)
            ssdfg.validate()
            if isinstance(options[0], StateFusion):
                fusions += 1
            if isinstance(options[0], RedundantArray):
                arrays += 1

            options = [
                match for match in opt.get_pattern_matches(strict=True)
                if isinstance(match, (StateFusion, RedundantArray))
            ]

        if Config.get_bool('debugprint') and (fusions > 0 or arrays > 0):
            print('Automatically applied {} strict state fusions and removed'
                  ' {} redundant arrays.'.format(fusions, arrays))
Esempio n. 11
0
class MapTiling(transformation.Transformation):
    """ Implements the orthogonal tiling transformation.

        Orthogonal tiling is a type of nested map fission that creates tiles
        in every dimension of the matched Map.
    """

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    # Properties
    prefix = Property(dtype=str,
                      default="tile",
                      desc="Prefix for new range symbols")
    tile_sizes = ShapeProperty(dtype=tuple,
                               default=(128, 128, 128),
                               desc="Tile size per dimension")

    strides = ShapeProperty(
        dtype=tuple,
        default=tuple(),
        desc="Tile stride (enables overlapping tiles). If empty, matches tile")

    tile_offset = ShapeProperty(dtype=tuple,
                                default=None,
                                desc="Negative Stride offset per dimension",
                                allow_none=True)

    divides_evenly = Property(dtype=bool,
                              default=False,
                              desc="Tile size divides dimension length evenly")
    tile_trivial = Property(dtype=bool,
                            default=False,
                            desc="Tiles even if tile_size is 1")

    @staticmethod
    def annotates_memlets():
        return True

    @staticmethod
    def expressions():
        return [sdutil.node_path_graph(MapTiling._map_entry)]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = graph.nodes()[candidate[MapTiling._map_entry]]
        return map_entry.map.label + ': ' + str(map_entry.map.params)

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]

        tile_strides = self.tile_sizes
        if self.strides is not None and len(self.strides) == len(tile_strides):
            tile_strides = self.strides

        # Retrieve map entry and exit nodes.
        map_entry = graph.nodes()[self.subgraph[MapTiling._map_entry]]
        from dace.transformation.dataflow.map_collapse import MapCollapse
        from dace.transformation.dataflow.strip_mining import StripMining
        stripmine_subgraph = {
            StripMining._map_entry: self.subgraph[MapTiling._map_entry]
        }
        sdfg_id = sdfg.sdfg_id
        last_map_entry = None
        removed_maps = 0

        original_schedule = map_entry.schedule

        for dim_idx in range(len(map_entry.map.params)):
            if dim_idx >= len(self.tile_sizes):
                tile_size = symbolic.pystr_to_symbolic(self.tile_sizes[-1])
                tile_stride = symbolic.pystr_to_symbolic(tile_strides[-1])
            else:
                tile_size = symbolic.pystr_to_symbolic(
                    self.tile_sizes[dim_idx])
                tile_stride = symbolic.pystr_to_symbolic(tile_strides[dim_idx])

            # handle offsets
            if self.tile_offset and dim_idx >= len(self.tile_offset):
                offset = self.tile_offset[-1]
            elif self.tile_offset:
                offset = self.tile_offset[dim_idx]
            else:
                offset = 0

            dim_idx -= removed_maps
            # If tile size is trivial, skip strip-mining map dimension
            if tile_size == map_entry.map.range.size()[dim_idx]:
                continue

            stripmine = StripMining(sdfg_id, self.state_id, stripmine_subgraph,
                                    self.expr_index)

            # Special case: Tile size of 1 should be omitted from inner map
            if tile_size == 1 and tile_stride == 1 and self.tile_trivial == False:
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = ''
                stripmine.tile_size = str(tile_size)
                stripmine.tile_stride = str(tile_stride)
                stripmine.divides_evenly = True
                stripmine.tile_offset = str(offset)
                stripmine.apply(sdfg)
                removed_maps += 1
            else:
                stripmine.dim_idx = dim_idx
                stripmine.new_dim_prefix = self.prefix
                stripmine.tile_size = str(tile_size)
                stripmine.tile_stride = str(tile_stride)
                stripmine.divides_evenly = self.divides_evenly
                stripmine.tile_offset = str(offset)
                stripmine.apply(sdfg)

            # apply to the new map the schedule of the original one
            map_entry.schedule = original_schedule

            if last_map_entry:
                new_map_entry = graph.in_edges(map_entry)[0].src
                mapcollapse_subgraph = {
                    MapCollapse._outer_map_entry:
                    graph.node_id(last_map_entry),
                    MapCollapse._inner_map_entry: graph.node_id(new_map_entry)
                }
                mapcollapse = MapCollapse(sdfg_id, self.state_id,
                                          mapcollapse_subgraph, 0)
                mapcollapse.apply(sdfg)
            last_map_entry = graph.in_edges(map_entry)[0].src
Esempio n. 12
0
class Transformation(object):
    """ Base class for transformations, as well as a static registry of 
        transformations, where new transformations can be added in a 
        decentralized manner.
    """

    ####################################################################
    # Transformation registry

    # Class attributes

    _patterns = set()
    _stateflow_patterns = set()

    # Static methods

    @staticmethod
    def patterns():
        """ Returns a list of single-state (dataflow) transformations 
            currently in the registry. """

        pattern_list = sorted(Transformation._patterns,
                              key=lambda cls: cls.__name__)
        return pattern_list

    @staticmethod
    def stateflow_patterns():
        """ Returns a list of multiple-state (interstate) transformations 
            currently in the registry. """

        pattern_list = sorted(Transformation._stateflow_patterns,
                              key=lambda cls: cls.__name__)
        return pattern_list

    @staticmethod
    def register_pattern(clazz):
        """ Registers a single-state (dataflow) transformation in the registry.
            @param clazz: The Transformation class type.
        """

        if not issubclass(clazz, Transformation):
            raise TypeError
        Transformation._patterns.add(clazz)

    @staticmethod
    def register_stateflow_pattern(clazz):
        """ Registers a multi-state transformation in the registry.
            @param clazz: The Transformation class type.
        """

        if not issubclass(clazz, Transformation):
            raise TypeError
        Transformation._stateflow_patterns.add(clazz)

    @staticmethod
    def register_pattern_file(filename):
        """ Registers all transformations in a single Python file. """

        pattern_members = {}
        with open(filename) as pattern_file:
            exec(pattern_file.read(), pattern_members)
        for member in pattern_members.values():
            if inspect.isclass(member) and issubclass(member, Transformation):
                Transformation.register_pattern(member)

    @staticmethod
    def deregister_pattern(clazz):
        """ De-registers a transformation.
            @param clazz: The Transformation class type.
        """

        if not issubclass(clazz, Transformation):
            raise TypeError
        Transformation._patterns.remove(clazz)

    ####################################################################
    # Static and object methods

    # Properties
    sdfg_id = Property(dtype=int, category="(Debug)")
    state_id = Property(dtype=int, category="(Debug)")
    subgraph = SubgraphProperty(dtype=dict, category="(Debug)")
    expr_index = Property(dtype=int, category="(Debug)")

    @staticmethod
    def annotates_memlets():
        """ Indicates whether the transformation annotates the edges it creates
            or modifies with the appropriate memlets. This determines
            whether to apply memlet propagation after the transformation.
        """

        return False

    @staticmethod
    def expressions():
        """ Returns a list of Graph objects that will be matched in the 
            subgraph isomorphism phase. Used as a pre-pass before calling 
            `can_be_applied`.
            @see Transformation.can_be_applied
        """

        raise NotImplementedError

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        """ Returns True if this transformation can be applied on the candidate
            matched subgraph.
            @param graph: SDFGState object if this Transformation is 
                          single-state, or SDFG object otherwise.
            @param candidate: A mapping between node IDs returned from 
                              `Transformation.expressions` and the nodes in 
                              `graph`.
            @param expr_index: The list index from `Transformation.expressions`
                               that was matched.
            @param sdfg: If `graph` is an SDFGState, its parent SDFG. Otherwise
                         should be equal to `graph`.
            @return: True if the transformation can be applied.
        """
        raise NotImplementedError

    @staticmethod
    def match_to_str(graph, candidate):
        """ Returns a string representation of the pattern match on the 
            candidate subgraph. Used when identifying matches in the console 
            UI.
        """
        raise NotImplementedError

    def __init__(self, sdfg_id, state_id, subgraph, expr_index):
        """ Initializes an instance of Transformation.
            @param sdfg_id: A unique ID of the SDFG.
            @param state_id: The node ID of the SDFG state, if applicable.
            @param subgraph: A mapping between node IDs returned from 
                             `Transformation.expressions` and the nodes in 
                             `graph`.
            @param expr_index: The list index from `Transformation.expressions`
                               that was matched.
            @raise TypeError: When transformation is not subclass of
                              Transformation.
            @raise TypeError: When state_id is not instance of int.
            @raise TypeError: When subgraph is not a dict of 
                              dace.graph.nodes.Node : int.
        """

        self.sdfg_id = sdfg_id
        self.state_id = state_id
        for value in subgraph.values():
            if not isinstance(value, int):
                raise TypeError('All values of '
                                'subgraph'
                                ' dictionary must be '
                                'instances of int.')
        self.subgraph = subgraph
        self.expr_index = expr_index

    def __lt__(self, other):
        """ Comparing two transformations by their class name and node IDs
            in match. Used for ordering transformations consistently.
        """
        if type(self) != type(other):
            return type(self).__name__ < type(other).__name__

        self_ids = iter(self.subgraph.values())
        other_ids = iter(self.subgraph.values())

        try:
            self_id = next(self_ids)
        except StopIteration:
            return True
        try:
            other_id = next(other_ids)
        except StopIteration:
            return False

        self_end = False

        while self_id is not None and other_id is not None:
            if self_id != other_id:
                return self_id < other_id
            try:
                self_id = next(self_ids)
            except StopIteration:
                self_end = True
            try:
                other_id = next(other_ids)
            except StopIteration:
                if self_end:  # Transformations are equal
                    return False
                return False
            if self_end:
                return True

    def apply_pattern(self, sdfg):
        """ Applies this transformation on the given SDFG. """
        self.apply(sdfg)
        if not self.annotates_memlets():
            labeling.propagate_labels_sdfg(sdfg)

    def __str__(self):
        raise NotImplementedError

    def print_match(self, sdfg):
        """ Returns a string representation of the pattern match on the 
            given SDFG. Used for printing matches in the console UI.
        """
        if not isinstance(sdfg, dace.SDFG):
            raise TypeError("Expected SDFG, got: {}".format(
                type(sdfg).__name__))
        if self.state_id == -1:
            graph = sdfg
        else:
            graph = sdfg.nodes()[self.state_id]
        string = type(self).__name__ + ' in '
        string += type(self).match_to_str(graph, self.subgraph)
        return string

    @staticmethod
    def print_debuginfo():
        pass
class GPUPersistentKernel(SubgraphTransformation):
    """
    This transformation takes a given subgraph of an SDFG and fuses the 
    given states into a single persistent GPU kernel. Before this transformation can
    be applied the SDFG needs to be transformed to run on the GPU (e.g. with
    the GPUTransformSDFG transformation).
    
    If applicable the transform removes the selected states from the original
    SDFG and places a `launch` state in its place. The removed states will be
    added to a nested SDFG in the launch state. If necessary guard states will
    be added in the nested SDFG, in order to make sure global assignments on
    Interstate edges will be performed in the kernel (this can be disabled with
    the `include_in_assignment` property).
    
    The given subgraph needs to fulfill the following properties to be fused:
    
     - All states in the selected subgraph need to fulfill the following:
        - access only GPU accessible memory
        - all concurrent DFGs inside the state are either sequential or inside
          a GPU_Device map.
     - the selected subgraph has a single point of entry in the form of a 
       single InterstateEdge entering the subgraph (i.e. there is at most one
       state (not part of the subgraph) from which the kernel is entered and
       exactly one state inside the subgraph from which the kernel starts
       execution)
     - the selected subgraph has a single point of exit in the form of a single
       state that is entered after the selected subgraph is left (There can be
       multiple states from which the kernel can be left, but all will leave to
       the same state outside the subgraph)
    """

    validate = Property(
        desc="Validate the sdfg and the nested sdfg",
        dtype=bool,
        default=True,
    )

    include_in_assignment = Property(
        desc="Wether to include global variable assignments of the edge going "
        "into the kernel inside the kernel or have it happen on the "
        "outside. If the assignment is needed in the kernel, it needs to "
        "be included.",
        dtype=bool,
        default=True,
    )

    kernel_prefix = Property(
        desc="Name of the kernel. If no value is given the kerenl will be "
        "refrenced as `kernel`, if a value is given the kernel will be "
        "named `<kernel_prefix>_kernel`. This is useful if multiple "
        "kernels are created.",
        dtype=str,
        default='',
    )

    @staticmethod
    def can_be_applied(sdfg: SDFG, subgraph: SubgraphView):

        if not set(subgraph.nodes()).issubset(set(sdfg.nodes())):
            return False

        # All states need to be GPU states
        for state in subgraph:
            if not GPUPersistentKernel.is_gpu_state(sdfg, state):
                return False

        # for now exactly one inner and one outer entry state
        entry_states_in, entry_states_out = \
            GPUPersistentKernel.get_entry_states(sdfg, subgraph)
        if len(entry_states_in) > 1 or len(entry_states_out) > 1:
            return False

        entry_state_in = entry_states_in.pop()
        if len(entry_states_out) == 1 \
                and len(sdfg.edges_between(entry_states_out.pop(),
                                           entry_state_in)
                        ) > 1:
            return False

        # for now only one outside state allowed, multiple inner exit states
        # allowed
        _, exit_states_out = GPUPersistentKernel.get_exit_states(
            sdfg, subgraph)
        if len(exit_states_out) > 1:
            return False

        # check reachability
        front = [entry_state_in]
        reachable = {entry_state_in}

        while len(front) > 0:
            current = front.pop(0)
            unseen = [
                suc for suc in subgraph.successors(current)
                if suc not in reachable
            ]
            front += unseen
            reachable.update(unseen)

        if reachable != set(subgraph.nodes()):
            return False

        return True

    def apply(self, sdfg: SDFG):
        subgraph = self.subgraph_view(sdfg)
        if not self.can_be_applied(sdfg, subgraph):
            raise Exception('The given subgraph cannot be fused!')

        entry_states_in, entry_states_out = self.get_entry_states(
            sdfg, subgraph)
        _, exit_states_out = self.get_exit_states(sdfg, subgraph)

        entry_state_in = entry_states_in.pop()
        entry_state_out = entry_states_out.pop() \
            if len(entry_states_out) > 0 else None
        exit_state_out = exit_states_out.pop() \
            if len(exit_states_out) > 0 else None

        launch_state = None
        entry_guard_state = None
        exit_guard_state = None

        # generate entry guard state if needed
        if self.include_in_assignment and entry_state_out is not None:
            entry_edge = sdfg.edges_between(entry_state_out, entry_state_in)[0]
            if len(entry_edge.data.assignments) > 0:
                entry_guard_state = sdfg.add_state(
                    label='{}kernel_entry_guard'.format(
                        self.kernel_prefix +
                        '_' if self.kernel_prefix != '' else ''))
                sdfg.add_edge(entry_state_out, entry_guard_state,
                              InterstateEdge(entry_edge.data.condition))
                sdfg.add_edge(
                    entry_guard_state, entry_state_in,
                    InterstateEdge(None, entry_edge.data.assignments))
                sdfg.remove_edge(entry_edge)

                # Update SubgraphView
                new_node_list = subgraph.nodes()
                new_node_list.append(entry_guard_state)
                subgraph = SubgraphView(sdfg, new_node_list)

                launch_state = sdfg.add_state_before(
                    entry_guard_state,
                    label='{}kernel_launch'.format(
                        self.kernel_prefix +
                        '_' if self.kernel_prefix != '' else ''))

        # generate exit guard state
        if exit_state_out is not None:
            exit_guard_state = sdfg.add_state_before(
                exit_state_out,
                label='{}kernel_exit_guard'.format(
                    self.kernel_prefix +
                    '_' if self.kernel_prefix != '' else ''))

            # Update SubgraphView
            new_node_list = subgraph.nodes()
            new_node_list.append(exit_guard_state)
            subgraph = SubgraphView(sdfg, new_node_list)

            if launch_state is None:
                launch_state = sdfg.add_state_before(
                    exit_state_out,
                    label='{}kernel_launch'.format(
                        self.kernel_prefix +
                        '_' if self.kernel_prefix != '' else ''))

        # If the launch state doesn't exist at this point then there is no other
        # states outside of the kernel, so create a stand alone launch state
        if launch_state is None:
            assert (entry_state_in is None and exit_state_out is None)
            launch_state = sdfg.add_state(label='{}kernel_launch'.format(
                self.kernel_prefix + '_' if self.kernel_prefix != '' else ''))

        # create sdfg for kernel and fill it with states and edges from
        # ssubgraph dfg will be nested at the end
        kernel_sdfg = SDFG(
            '{}kernel'.format(self.kernel_prefix +
                              '_' if self.kernel_prefix != '' else ''))

        edges = subgraph.edges()
        for edge in edges:
            kernel_sdfg.add_edge(edge.src, edge.dst, edge.data)

        # Setting entry node in nested SDFG if no entry guard was created
        if entry_guard_state is None:
            kernel_sdfg.start_state = kernel_sdfg.node_id(entry_state_in)

        for state in subgraph:
            state.parent = kernel_sdfg

        # remove the now nested nodes from the outer sdfg and make sure the
        # launch state is properly connected to remaining states
        sdfg.remove_nodes_from(subgraph.nodes())

        if entry_state_out is not None \
                and len(sdfg.edges_between(entry_state_out, launch_state)) == 0:
            sdfg.add_edge(entry_state_out, launch_state, InterstateEdge())

        if exit_state_out is not None \
                and len(sdfg.edges_between(launch_state, exit_state_out)) == 0:
            sdfg.add_edge(launch_state, exit_state_out, InterstateEdge())

        # Handle data for kernel
        kernel_data = set(node.data for state in kernel_sdfg
                          for node in state.nodes()
                          if isinstance(node, nodes.AccessNode))

        # move Streams and Register data into the nested SDFG
        # normal data will be added as kernel argument
        kernel_args = []
        for data in kernel_data:
            if (isinstance(sdfg.arrays[data], dace.data.Stream) or
                (isinstance(sdfg.arrays[data], dace.data.Array)
                 and sdfg.arrays[data].storage == StorageType.Register)):
                kernel_sdfg.add_datadesc(data, sdfg.arrays[data])
                del sdfg.arrays[data]
            else:
                copy_desc = copy.deepcopy(sdfg.arrays[data])
                copy_desc.transient = False
                copy_desc.storage = StorageType.Default
                kernel_sdfg.add_datadesc(data, copy_desc)
                kernel_args.append(data)

        # read only data will be passed as input, writeable data will be passed
        # as 'output' otherwise kernel cannot write to data
        kernel_args_read = set()
        kernel_args_write = set()
        for data in kernel_args:
            data_accesses_read_only = [
                node.access == dtypes.AccessType.ReadOnly
                for state in kernel_sdfg for node in state
                if isinstance(node, nodes.AccessNode) and node.data == data
            ]
            if all(data_accesses_read_only):
                kernel_args_read.add(data)
            else:
                kernel_args_write.add(data)

        # Kernel SDFG is complete at this point
        if self.validate:
            kernel_sdfg.validate()

        # Filling launch state with nested SDFG, map and access nodes
        map_entry, map_exit = launch_state.add_map(
            '{}kernel_launch_map'.format(
                self.kernel_prefix + '_' if self.kernel_prefix != '' else ''),
            dict(ignore='0'),
            schedule=ScheduleType.GPU_Persistent,
        )

        nested_sdfg = launch_state.add_nested_sdfg(
            kernel_sdfg,
            sdfg,
            kernel_args_read,
            kernel_args_write,
        )

        # Create and connect read only data access nodes
        for arg in kernel_args_read:
            read_node = launch_state.add_read(arg)
            launch_state.add_memlet_path(read_node,
                                         map_entry,
                                         nested_sdfg,
                                         dst_conn=arg,
                                         memlet=Memlet.from_array(
                                             arg, sdfg.arrays[arg]))

        # Create and connect writable data access nodes
        for arg in kernel_args_write:
            write_node = launch_state.add_write(arg)
            launch_state.add_memlet_path(nested_sdfg,
                                         map_exit,
                                         write_node,
                                         src_conn=arg,
                                         memlet=Memlet.from_array(
                                             arg, sdfg.arrays[arg]))

        # Transformation is done
        if self.validate:
            sdfg.validate()

    @staticmethod
    def is_gpu_state(sdfg: SDFG, state: SDFGState) -> bool:

        # Valid storrage types
        gpu_accessible = [
            StorageType.GPU_Global,
            StorageType.GPU_Shared,
            StorageType.CPU_Pinned,
            StorageType.Register,
        ]

        for node in state.data_nodes():
            if type(node.desc(sdfg)) in [dace.data.Array,
                                        dace.data.Stream] \
                    and node.desc(sdfg).storage not in gpu_accessible:
                return False

        gpu_fused_schedules = [
            ScheduleType.Default,
            ScheduleType.Sequential,
            ScheduleType.GPU_Device,
            ScheduleType.GPU_ThreadBlock,
            ScheduleType.GPU_ThreadBlock_Dynamic,
        ]

        for schedule in [
                n.map.schedule for n in state.nodes()
                if isinstance(n, nodes.MapEntry)
        ]:
            if schedule not in gpu_fused_schedules:
                return False

        return True

    @staticmethod
    def get_entry_states(sdfg: SDFG, subgraph):
        entry_states_in = set()
        entry_states_out = set()

        for state in subgraph:
            inner_predecessors = set(subgraph.predecessors(state))
            global_predecessors = set(sdfg.predecessors(state))
            outer_predecessors = global_predecessors - inner_predecessors
            if len(outer_predecessors) > 0:
                entry_states_in.add(state)
                entry_states_out |= outer_predecessors

        return entry_states_in, entry_states_out

    @staticmethod
    def get_exit_states(sdfg: SDFG, subgraph):
        exit_states_in = set()
        exit_states_out = set()

        for state in subgraph:
            inner_successors = set(subgraph.successors(state))
            global_successors = set(sdfg.successors(state))
            outer_successors = global_successors - inner_successors
            if len(outer_successors) > 0:
                exit_states_in.add(state)
                exit_states_out |= outer_successors

        return exit_states_in, exit_states_out
Esempio n. 14
0
class SubgraphTransformation(object):
    """
    Base class for transformations that apply on arbitrary subgraphs, rather than
    matching a specific pattern. Subclasses need to implement the `match` and `apply`
    operations.
    """

    sdfg_id = Property(dtype=int, desc='ID of SDFG to transform')
    state_id = Property(
        dtype=int,
        desc='ID of state to transform subgraph within, or -1 to transform the '
        'SDFG')
    subgraph = SetProperty(element_type=int,
                           desc='Subgraph in transformation instance')

    def __init__(self,
                 subgraph: Union[Set[int], SubgraphView],
                 sdfg_id: int = None,
                 state_id: int = None):
        if (not isinstance(subgraph, (SubgraphView, SDFG, SDFGState))
                and (sdfg_id is None or state_id is None)):
            raise TypeError(
                'Subgraph transformation either expects a SubgraphView or a '
                'set of node IDs, SDFG ID and state ID (or -1).')

        # An entire graph is given as a subgraph
        if isinstance(subgraph, (SDFG, SDFGState)):
            subgraph = SubgraphView(subgraph, subgraph.nodes())

        if isinstance(subgraph, SubgraphView):
            self.subgraph = set(
                subgraph.graph.node_id(n) for n in subgraph.nodes())

            if isinstance(subgraph.graph, SDFGState):
                sdfg = subgraph.graph.parent
                self.sdfg_id = sdfg.sdfg_id
                self.state_id = sdfg.node_id(subgraph.graph)
            elif isinstance(subgraph.graph, SDFG):
                self.sdfg_id = subgraph.graph.sdfg_id
                self.state_id = -1
            else:
                raise TypeError('Unrecognized graph type "%s"' %
                                type(subgraph.graph).__name__)
        else:
            self.subgraph = subgraph
            self.sdfg_id = sdfg_id
            self.state_id = state_id

    def subgraph_view(self, sdfg: SDFG) -> SubgraphView:
        graph = sdfg.sdfg_list[self.sdfg_id]
        if self.state_id != -1:
            graph = graph.node(self.state_id)
        return SubgraphView(graph, [graph.node(idx) for idx in self.subgraph])

    @staticmethod
    def match(sdfg: SDFG, subgraph: SubgraphView) -> bool:
        """
        Tries to match the transformation on a given subgraph, returning
        True if this transformation can be applied.
        :param sdfg: The SDFG that includes the subgraph.
        :param subgraph: The SDFG or state subgraph to try to apply the 
                         transformation on.
        :return: True if the subgraph can be transformed, or False otherwise.
        """
        pass

    def apply(self, sdfg: SDFG):
        """
        Applies the transformation on the given subgraph.
        :param sdfg: The SDFG that includes the subgraph.
        """
        pass

    def to_json(self, parent=None):
        props = dace.serialize.all_properties_to_json(self)
        return {
            'type': 'SubgraphTransformation',
            'transformation': type(self).__name__,
            **props
        }

    @staticmethod
    def from_json(json_obj, context=None):
        xform = next(ext for ext in SubgraphTransformation.extensions().keys()
                     if ext.__name__ == json_obj['transformation'])

        # Reconstruct transformation
        ret = xform(json_obj['subgraph'], json_obj['sdfg_id'],
                    json_obj['state_id'])
        context = context or {}
        context['transformation'] = ret
        dace.serialize.set_properties_from_json(
            ret,
            json_obj,
            context=context,
            ignore_properties={'transformation', 'type'})
        return ret
Esempio n. 15
0
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty()
    shape = ShapeProperty()
    transient = Property(dtype=bool)
    storage = Property(dtype=dace.types.StorageType,
                       desc="Storage location",
                       enum=dace.types.StorageType,
                       default=dace.types.StorageType.Default,
                       from_string=lambda x: types.StorageType[x])
    location = Property(
        dtype=str,  # Dict[str, symbolic]
        desc='Full storage location identifier (e.g., rank, GPU ID)',
        default='')
    toplevel = Property(dtype=bool,
                        desc="Allocate array outside of state",
                        default=False)
    debuginfo = DebugInfoProperty()

    def __init__(self, dtype, shape, transient, storage, location, toplevel,
                 debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location
        self.toplevel = toplevel
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values '
                            'or symbols')
        return True

    def copy(self):
        raise RuntimeError(
            'Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def signature(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'
Esempio n. 16
0
class GPUTransformLocalStorage(transformation.Transformation):
    """Implements the GPUTransformLocalStorage transformation.

        Similar to GPUTransformMap, but takes multiple maps leading from the
        same data node into account, creating a local storage for each range.

        @see: GPUTransformMap
    """

    _arrays_removed = 0
    _maps_transformed = 0

    fullcopy = Property(desc="Copy whole arrays rather than used subset",
                        dtype=bool,
                        default=False)

    nested_seq = Property(
        desc="Makes nested code semantically-equivalent to single-core code,"
        "transforming nested maps and memory into sequential and "
        "local memory respectively.",
        dtype=bool,
        default=True,
    )

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    import dace.libraries.standard as stdlib  # Avoid import loop
    _reduce = stdlib.Reduce("lambda: None", None)

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(GPUTransformLocalStorage._map_entry),
            sdutil.node_path_graph(GPUTransformLocalStorage._reduce),
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        if expr_index == 0:
            map_entry = graph.nodes()[candidate[
                GPUTransformLocalStorage._map_entry]]
            candidate_map = map_entry.map

            # Disallow GPUTransform on nested maps in strict mode
            if strict:
                if graph.entry_node(map_entry) is not None:
                    return False

            # Map schedules that are disallowed to transform to GPUs
            if (candidate_map.schedule == dtypes.ScheduleType.MPI
                    or candidate_map.schedule == dtypes.ScheduleType.GPU_Device
                    or candidate_map.schedule
                    == dtypes.ScheduleType.GPU_ThreadBlock or
                    candidate_map.schedule == dtypes.ScheduleType.Sequential):
                return False

            # Dynamic map ranges cannot become kernels
            if sd.has_dynamic_map_inputs(graph, map_entry):
                return False

            # Recursively check parent for GPU schedules
            sdict = graph.scope_dict()
            current_node = map_entry
            while current_node is not None:
                if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device
                        or current_node.map.schedule
                        == dtypes.ScheduleType.GPU_ThreadBlock):
                    return False
                current_node = sdict[current_node]

            # Ensure that map does not include internal arrays that are
            # allocated on non-default space
            subgraph = graph.scope_subgraph(map_entry)
            for node in subgraph.nodes():
                if (isinstance(node, nodes.AccessNode) and
                        node.desc(sdfg).storage != dtypes.StorageType.Default
                        and node.desc(sdfg).storage !=
                        dtypes.StorageType.Register):
                    return False

            # If one of the outputs is a stream, do not match
            map_exit = graph.exit_node(map_entry)
            for edge in graph.out_edges(map_exit):
                dst = graph.memlet_path(edge)[-1].dst
                if (isinstance(dst, nodes.AccessNode)
                        and isinstance(sdfg.arrays[dst.data], data.Stream)):
                    return False

            return True
        elif expr_index == 1:
            reduce = graph.nodes()[candidate[GPUTransformLocalStorage._reduce]]

            # Recursively check parent for GPU schedules
            sdict = graph.scope_dict()
            current_node = sdict[reduce]
            while current_node is not None:
                if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device
                        or current_node.map.schedule
                        == dtypes.ScheduleType.GPU_ThreadBlock):
                    return False
                current_node = sdict[current_node]

            return True

    @staticmethod
    def match_to_str(graph, candidate):
        if GPUTransformLocalStorage._reduce in candidate:
            return str(
                graph.nodes()[candidate[GPUTransformLocalStorage._reduce]])
        else:
            map_entry = graph.nodes()[candidate[
                GPUTransformLocalStorage._map_entry]]
            return str(map_entry)

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        if self.expr_index == 0:
            cnode: nodes.MapEntry = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._map_entry]]
            # Change schedule
            cnode.schedule = dtypes.ScheduleType.GPU_Device
            exit_node = graph.exit_node(cnode)
        else:
            cnode: nodes.LibraryNode = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._reduce]]
            # Change schedule
            cnode.schedule = dtypes.ScheduleType.GPU_Default
            exit_node = cnode

        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._maps_transformed += 1
        # If nested graph is designated as sequential, transform schedules and
        # storage from Default to Sequential/Register
        if self.nested_seq and self.expr_index == 0:
            for node in graph.scope_subgraph(cnode).nodes():
                if isinstance(node, nodes.AccessNode):
                    arr = node.desc(sdfg)
                    if arr.storage == dtypes.StorageType.Default:
                        arr.storage = dtypes.StorageType.Register
                elif isinstance(node, nodes.MapEntry):
                    if node.map.schedule == dtypes.ScheduleType.Default:
                        node.map.schedule = dtypes.ScheduleType.Sequential

        gpu_storage_types = [
            dtypes.StorageType.GPU_Global,
            dtypes.StorageType.GPU_Shared,
        ]

        #######################################################
        # Add GPU copies of CPU arrays (i.e., not already on GPU)

        # First, understand which arrays to clone
        all_out_edges = []
        all_out_edges.extend(list(graph.out_edges(exit_node)))
        in_arrays_to_clone = set()
        out_arrays_to_clone = set()
        for e in graph.in_edges(cnode):
            data_node = sd.find_input_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                in_arrays_to_clone.add((data_node, e.data))
        for e in all_out_edges:
            data_node = sd.find_output_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                out_arrays_to_clone.add((data_node, e.data))

        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._arrays_removed += len(
                in_arrays_to_clone) + len(out_arrays_to_clone)

        # Second, create a GPU clone of each array
        # TODO: Overapproximate union of memlets
        cloned_arrays = {}
        in_cloned_arraynodes = {}
        out_cloned_arraynodes = {}
        for array_node, memlet in in_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)

            in_cloned_arraynodes[array_node.data] = cloned_node
        for array_node, memlet in out_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)
            cloned_node.setzero = True

            out_cloned_arraynodes[array_node.data] = cloned_node

        # Third, connect the cloned arrays to the originals
        for array_name, node in in_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in graph.in_edges(cnode):
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   newmemlet)

                    for e in graph.bfs_edges(edge.dst, reverse=False):
                        parent, _, _child, _, memlet = e
                        if parent != edge.dst and not in_scope(
                                graph, parent, edge.dst):
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[-1].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.ExitNode, forward=True):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   edge.data)
                    graph.remove_edge(edge)

        for array_name, node in out_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in all_out_edges:
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   newmemlet)

                    end_node = graph.entry_node(edge.src)
                    for e in graph.bfs_edges(edge.src, reverse=True):
                        parent, _, _child, _, memlet = e
                        if parent == end_node:
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[0].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.EntryNode,
                                       forward=False):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    edge.data.wcr = None
                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   edge.data)
                    graph.remove_edge(edge)

        # Fourth, replace memlet arrays as necessary
        if self.expr_index == 0:
            scope_subgraph = graph.scope_subgraph(cnode)
            for edge in scope_subgraph.edges():
                if edge.data.data is not None and edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]
Esempio n. 17
0
class Stream(Data):
    """ Stream (or stream array) data descriptor. """

    # Properties
    strides = Property(dtype=list)
    offset = Property(dtype=list)
    buffer_size = Property(dtype=int, desc="Size of internal buffer.")
    veclen = Property(dtype=int,
                      desc="Vector length. Memlets must adhere to this.")

    def __init__(self,
                 dtype,
                 veclen,
                 buffer_size,
                 shape=None,
                 transient=False,
                 storage=dace.types.StorageType.Default,
                 location='',
                 strides=None,
                 offset=None,
                 toplevel=False,
                 debuginfo=None):

        if shape is None:
            shape = (1, )

        self.veclen = veclen
        self.buffer_size = buffer_size

        if strides is not None:
            if len(strides) != len(shape):
                raise TypeError('Strides must be the same size as shape')
            self.strides = cp.copy(strides)
        else:
            self.strides = cp.copy(list(shape))

        if offset is not None:
            if len(offset) != len(shape):
                raise TypeError('Offset must be the same size as shape')
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        super(Stream, self).__init__(dtype, shape, transient, storage,
                                     location, toplevel, debuginfo)

    def __repr__(self):
        return 'Stream (dtype=%s, shape=%s)' % (self.dtype, self.shape)

    def clone(self):
        return Stream(self.dtype, self.veclen, self.buffer_size, self.shape,
                      self.transient, self.storage, self.location,
                      self.strides, self.offset, self.toplevel, self.debuginfo)

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, Stream):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # If both are symbols, ensure equality
            if symbolic.issymbolic(dim) and symbolic.issymbolic(otherdim):
                if dim != otherdim:
                    return False

            # If one is a symbol and the other is a constant
            # make sure they are equivalent
            elif symbolic.issymbolic(otherdim):
                if symbolic.eval(otherdim) != dim:
                    return False
            elif symbolic.issymbolic(dim):
                if symbolic.eval(dim) != otherdim:
                    return False
            else:
                # Any other case (constant vs. constant), check for equality
                if otherdim != dim:
                    return False
        return True

    def signature(self, with_types=True, for_call=False, name=None):
        if not with_types or for_call: return name
        if self.storage in [
                dace.types.StorageType.GPU_Global,
                dace.types.StorageType.GPU_Shared,
                dace.types.StorageType.GPU_Stack
        ]:
            return 'dace::GPUStream<%s, %s> %s' % (str(
                self.dtype.ctype), 'true' if sp.log(
                    self.buffer_size, 2).is_Integer else 'false', name)

        return 'dace::Stream<%s> %s' % (str(self.dtype.ctype), name)

    def sizes(self):
        return [
            d.name if isinstance(d, symbolic.symbol) else str(d)
            for d in self.shape
        ]

    def size_string(self):
        return (" * ".join([
            cppunparse.pyexpr2cpp(dace.symbolic.symstr(s))
            for s in self.strides
        ]))

    def is_stream_array(self):
        return functools.reduce(lambda a, b: a * b, self.strides) != 1

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sympy.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sympy.Symbol(str(s), **s.assumptions0)
                else:
                    s = sympy.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sympy.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sympy.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sympy.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True
Esempio n. 18
0
class MapReduceFusion(pm.Transformation):
    """ Implements the map-reduce-fusion transformation.
        Fuses a map with an immediately following reduction, where the array
        between the map and the reduction is not used anywhere else.
    """

    no_init = Property(
        dtype=bool,
        default=False,
        desc='If enabled, does not create initialization states '
        'for reduce nodes with identity')

    _tasklet = nodes.Tasklet('_')
    _tmap_exit = nodes.MapExit(nodes.Map("", [], []))
    _in_array = nodes.AccessNode('_')

    import dace.libraries.standard as stdlib  # Avoid import loop
    _reduce = stdlib.Reduce()

    _out_array = nodes.AccessNode('_')

    @staticmethod
    def expressions():
        return [
            sdutil.node_path_graph(MapReduceFusion._tasklet,
                                   MapReduceFusion._tmap_exit,
                                   MapReduceFusion._in_array,
                                   MapReduceFusion._reduce,
                                   MapReduceFusion._out_array)
        ]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        tmap_exit = graph.nodes()[candidate[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[candidate[MapReduceFusion._in_array]]
        reduce_node = graph.nodes()[candidate[MapReduceFusion._reduce]]
        tasklet = graph.nodes()[candidate[MapReduceFusion._tasklet]]

        # Make sure that the array is only accessed by the map and the reduce
        if any([
                src != tmap_exit
                for src, _, _, _, memlet in graph.in_edges(in_array)
        ]):
            return False
        if any([
                dest != reduce_node
                for _, _, dest, _, memlet in graph.out_edges(in_array)
        ]):
            return False

        tmem = next(e for e in graph.edges_between(tasklet, tmap_exit)
                    if e.data.data == in_array.data).data

        # (strict) Make sure that the transient is not accessed anywhere else
        # in this state or other states
        if strict and (len([
                n for n in graph.nodes()
                if isinstance(n, nodes.AccessNode) and n.data == in_array.data
        ]) > 1 or in_array.data in sdfg.shared_transients()):
            return False

        # If memlet already has WCR and it is different from reduce node,
        # do not match
        if tmem.wcr is not None and tmem.wcr != reduce_node.wcr:
            return False

        # Verify that reduction ranges match tasklet map
        tout_memlet = graph.in_edges(in_array)[0].data
        rin_memlet = graph.out_edges(in_array)[0].data
        if tout_memlet.subset != rin_memlet.subset:
            return False

        return True

    @staticmethod
    def match_to_str(graph, candidate):
        tasklet = candidate[MapReduceFusion._tasklet]
        map_exit = candidate[MapReduceFusion._tmap_exit]
        reduce = candidate[MapReduceFusion._reduce]

        return ' -> '.join(str(node) for node in [tasklet, map_exit, reduce])

    def apply(self, sdfg: SDFG):
        graph = sdfg.nodes()[self.state_id]
        tmap_exit = graph.nodes()[self.subgraph[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[self.subgraph[MapReduceFusion._in_array]]
        reduce_node = graph.nodes()[self.subgraph[MapReduceFusion._reduce]]
        out_array = graph.nodes()[self.subgraph[MapReduceFusion._out_array]]

        # Set nodes to remove according to the expression index
        nodes_to_remove = [in_array]
        nodes_to_remove.append(reduce_node)

        memlet_edge = None
        for edge in graph.in_edges(tmap_exit):
            if edge.data.data == in_array.data:
                memlet_edge = edge
                break
        if memlet_edge is None:
            raise RuntimeError('Reduction memlet cannot be None')

        # Find which indices should be removed from new memlet
        input_edge = graph.in_edges(reduce_node)[0]
        axes = reduce_node.axes or list(range(len(input_edge.data.subset)))
        array_edge = graph.out_edges(reduce_node)[0]

        # Delete relevant edges and nodes
        graph.remove_nodes_from(nodes_to_remove)

        # Filter out reduced dimensions from subset
        filtered_subset = [
            dim for i, dim in enumerate(memlet_edge.data.subset)
            if i not in axes
        ]
        if len(filtered_subset) == 0:  # Output is a scalar
            filtered_subset = [(0, 0, 1)]

        # Modify edge from tasklet to map exit
        memlet_edge.data.data = out_array.data
        memlet_edge.data.wcr = reduce_node.wcr
        memlet_edge.data.subset = type(
            memlet_edge.data.subset)(filtered_subset)

        # Add edge from map exit to output array
        graph.add_edge(
            memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:], array_edge.dst,
            array_edge.dst_conn,
            Memlet(array_edge.data.data, array_edge.data.num_accesses,
                   array_edge.data.subset, array_edge.data.veclen,
                   reduce_node.wcr))

        # Add initialization state as necessary
        if reduce_node.identity is not None:
            init_state = sdfg.add_state_before(graph)
            init_state.add_mapped_tasklet(
                'freduce_init',
                [('o%d' % i, '%s:%s:%s' % (r[0], r[1] + 1, r[2]))
                 for i, r in enumerate(array_edge.data.subset)], {},
                'out = %s' % reduce_node.identity, {
                    'out':
                    Memlet.simple(
                        array_edge.data.data, ','.join([
                            'o%d' % i
                            for i in range(len(array_edge.data.subset))
                        ]))
                },
                external_edges=True)
Esempio n. 19
0
class Memlet(object):
    """ Data movement object. Represents the data, the subset moved, and the
        manner it is reindexed (`other_subset`) into the destination.
        If there are multiple conflicting writes, this object also specifies
        how they are resolved with a lambda function.
    """

    # Properties
    veclen = Property(dtype=int, desc="Vector length")
    num_accesses = SymbolicProperty(default=0)
    subset = SubsetProperty(default=subsets.Range([]))
    other_subset = SubsetProperty(allow_none=True)
    data = DataProperty()
    debuginfo = DebugInfoProperty()
    wcr = LambdaProperty(allow_none=True)
    wcr_identity = Property(dtype=object, default=None, allow_none=True)
    wcr_conflict = Property(dtype=bool, default=True)
    allow_oob = Property(dtype=bool,
                         default=False,
                         desc='Bypass out-of-bounds validation')

    def __init__(self,
                 data,
                 num_accesses,
                 subset,
                 vector_length,
                 wcr=None,
                 wcr_identity=None,
                 other_subset=None,
                 debuginfo=None,
                 wcr_conflict=True):
        """ Constructs a Memlet.
            :param data: The data object or name to access. B{Note:} this
                         parameter will soon be deprecated.
            @type data: Either a string of the data descriptor name or an
                        AccessNode.
            :param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 `dace.dtypes.DYNAMIC` (-1),
                                 designates that the number of accesses is
                                 unknown at compile time.
            :param subset: The subset of `data` that is going to be accessed.
            :param vector_length: The length of a single unit of access to
                                  the data (used for vectorization
                                  optimizations).
            :param wcr: A lambda function specifying how write-conflicts
                        are resolved. The syntax of the lambda function receives two elements: `current` value and `new` value,
                        and returns the value after resolution. For example,
                        summation is `lambda cur, new: cur + new`.
            :param wcr_identity: Identity value used for the first write
                                 conflict. B{Note:} this parameter will soon
                                 be deprecated.
            :param other_subset: The reindexing of `subset` on the other
                                 connected data.
            :param debuginfo: Source-code information (e.g., line, file)
                              used for debugging.
            :param wcr_conflict: If False, forces non-locked conflict
                                 resolution when generating code. The default
                                 is to let the code generator infer this
                                 information from the SDFG.
        """

        # Properties
        self.num_accesses = num_accesses  # type: sympy.expr.Expr
        self.subset = subset  # type: subsets.Subset
        self.veclen = vector_length  # type: int
        if hasattr(data, 'data'):
            data = data.data
        self.data = data  # type: str

        # Annotates memlet with _how_ writing is performed in case of conflict
        self.wcr = wcr
        self.wcr_identity = wcr_identity
        self.wcr_conflict = wcr_conflict

        # The subset of the other endpoint we are copying from/to (note:
        # carries the dimensionality of the other endpoint too!)
        self.other_subset = other_subset

        self.debuginfo = debuginfo

    def to_json(self, parent_graph=None):
        attrs = dace.serialize.all_properties_to_json(self)

        retdict = {"type": "Memlet", "label": str(self), "attributes": attrs}

        return retdict

    @staticmethod
    def from_json(json_obj, context=None):
        if json_obj['type'] != "Memlet":
            raise TypeError("Invalid data type")

        # Create dummy object
        ret = Memlet("", dace.dtypes.DYNAMIC, None, 1)
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)

        return ret

    @staticmethod
    def simple(data,
               subset_str,
               veclen=1,
               wcr_str=None,
               wcr_identity=None,
               other_subset_str=None,
               wcr_conflict=True,
               num_accesses=None,
               debuginfo=None):
        """ Constructs a Memlet from string-based expressions.
            :param data: The data object or name to access. B{Note:} this
                         parameter will soon be deprecated.
            @type data: Either a string of the data descriptor name or an
                        AccessNode.
            :param subset_str: The subset of `data` that is going to
                               be accessed in string format. Example: '0:N'.
            :param veclen: The length of a single unit of access to
                           the data (used for vectorization optimizations).
            :param wcr_str: A lambda function (as a string) specifying
                            how write-conflicts are resolved. The syntax
                            of the lambda function receives two elements:
                            `current` value and `new` value,
                            and returns the value after resolution. For
                            example, summation is
                            `'lambda cur, new: cur + new'`.
            :param wcr_identity: Identity value used for the first write
                                 conflict. B{Note:} this parameter will soon
                                 be deprecated.
            :param other_subset_str: The reindexing of `subset` on the other
                                     connected data (as a string).
            :param wcr_conflict: If False, forces non-locked conflict
                                 resolution when generating code. The default
                                 is to let the code generator infer this
                                 information from the SDFG.
            :param num_accesses: The number of times that the moved data
                                 will be subsequently accessed. If
                                 `dace.dtypes.DYNAMIC` (-1),
                                 designates that the number of accesses is
                                 unknown at compile time.
            :param debuginfo: Source-code information (e.g., line, file)
                              used for debugging.

        """
        subset = SubsetProperty.from_string(subset_str)
        if num_accesses is not None:
            na = num_accesses
        else:
            na = subset.num_elements()

        if wcr_str is not None:
            wcr = LambdaProperty.from_string(wcr_str)
        else:
            wcr = None

        if other_subset_str is not None:
            other_subset = SubsetProperty.from_string(other_subset_str)
        else:
            other_subset = None

        # If it is an access node or another memlet
        if hasattr(data, 'data'):
            data = data.data

        return Memlet(data,
                      na,
                      subset,
                      veclen,
                      wcr=wcr,
                      wcr_identity=wcr_identity,
                      other_subset=other_subset,
                      wcr_conflict=wcr_conflict,
                      debuginfo=debuginfo)

    @staticmethod
    def from_array(dataname, datadesc):
        """ Constructs a Memlet that transfers an entire array's contents.
            :param dataname: The name of the data descriptor in the SDFG.
            :param datadesc: The data descriptor object.
            @type datadesc: Data.
        """
        range = subsets.Range.from_array(datadesc)
        return Memlet(dataname, range.num_elements(), range, 1)

    def __hash__(self):
        return hash((self.data, self.num_accesses, self.subset, self.veclen,
                     str(self.wcr), self.wcr_identity, self.other_subset))

    def __eq__(self, other):
        return all([
            self.data == other.data, self.num_accesses == other.num_accesses,
            self.subset == other.subset, self.veclen == other.veclen,
            self.wcr == other.wcr, self.wcr_identity == other.wcr_identity,
            self.other_subset == other.other_subset
        ])

    def num_elements(self):
        """ Returns the number of elements in the Memlet subset. """
        return self.subset.num_elements()

    def bounding_box_size(self):
        """ Returns a per-dimension upper bound on the maximum number of
            elements in each dimension.

            This bound will be tight in the case of Range.
        """
        return self.subset.bounding_box_size()

    def validate(self, sdfg, state):
        if self.data is not None and self.data not in sdfg.arrays:
            raise KeyError('Array "%s" not found in SDFG' % self.data)

    def __label__(self, sdfg, state):
        """ Returns a string representation of the memlet for display in a
            graph.

            :param sdfg: The SDFG in which the memlet resides.
            :param state: An SDFGState object in which the memlet resides.
        """
        if self.data is None:
            return self._label(None)
        return self._label(sdfg.arrays[self.data].shape)

    def __str__(self):
        return self._label(None)

    def _label(self, shape):
        result = ''
        if self.data is not None:
            result = self.data

        if self.subset is None:
            return result

        num_elements = self.subset.num_elements()
        if self.num_accesses != num_elements:
            if self.num_accesses == -1:
                result += '(dyn) '
            else:
                result += '(%s) ' % SymbolicProperty.to_string(
                    self.num_accesses)
        arrayNotation = True
        try:
            if shape is not None and reduce(operator.mul, shape, 1) == 1:
                # Don't draw array if we're accessing a single element and it's zero
                if all(s == 0 for s in self.subset.min_element()):
                    arrayNotation = False
        except TypeError:
            # Will fail if trying to check the truth value of a sympy expr
            pass
        if arrayNotation:
            result += '[%s]' % str(self.subset)
        if self.wcr is not None and str(self.wcr) != '':
            # Autodetect reduction type
            redtype = detect_reduction_type(self.wcr)
            if redtype == dtypes.ReductionType.Custom:
                wcrstr = unparse(ast.parse(self.wcr).body[0].value.body)
            else:
                wcrstr = str(redtype)
                wcrstr = wcrstr[wcrstr.find('.') + 1:]  # Skip "ReductionType."

            result += ' (CR: %s' % wcrstr
            if self.wcr_identity is not None:
                result += ', id: %s' % str(self.wcr_identity)
            result += ')'

        if self.other_subset is not None:
            result += ' -> [%s]' % str(self.other_subset)
        return result

    def __repr__(self):
        return "Memlet (" + self.__str__() + ")"
Esempio n. 20
0
class Scalar(Data):
    """ Data descriptor of a scalar value. """

    allow_conflicts = Property(dtype=bool, default=False)

    def __init__(self,
                 dtype,
                 transient=False,
                 storage=dtypes.StorageType.Default,
                 allow_conflicts=False,
                 location=None,
                 lifetime=dtypes.AllocationLifetime.Scope,
                 debuginfo=None):
        self.allow_conflicts = allow_conflicts
        shape = [1]
        super(Scalar, self).__init__(dtype, shape, transient, storage, location,
                                     lifetime, debuginfo)

    @staticmethod
    def from_json(json_obj, context=None):
        if json_obj['type'] != "Scalar":
            raise TypeError("Invalid data type")

        # Create dummy object
        ret = Scalar(dtypes.int8)
        serialize.set_properties_from_json(ret, json_obj, context=context)

        # Check validity now
        ret.validate()
        return ret

    def __repr__(self):
        return 'Scalar (dtype=%s)' % self.dtype

    def clone(self):
        return Scalar(self.dtype, self.transient, self.storage,
                      self.allow_conflicts, self.location, self.lifetime,
                      self.debuginfo)

    @property
    def strides(self):
        return [1]

    @property
    def total_size(self):
        return 1

    @property
    def offset(self):
        return [0]

    def is_equivalent(self, other):
        if not isinstance(other, Scalar):
            return False
        if self.dtype != other.type:
            return False
        return True

    def as_arg(self, with_types=True, for_call=False, name=None):
        if not with_types or for_call:
            return name
        return self.dtype.as_arg(name)

    def sizes(self):
        return None

    def covers_range(self, rng):
        if len(rng) != 1:
            return False

        rng = rng[0]

        try:
            if (rng[1] - rng[0]) > rng[2]:
                return False
        except TypeError:  # cannot determine truth value of Relational
            pass
            #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % ((rng[1] - rng[0]) > rng[2]),
            #      'If this expression is false, please refine symbol definitions in the program.')

        return True
Esempio n. 21
0
class Transformation(TransformationBase):
    """ Base class for pattern-matching transformations, as well as a static
        registry of transformations, where new transformations can be added in a
        decentralized manner.
        An instance of a Transformation represents a match of the transformation
        on an SDFG, complete with a subgraph candidate and properties.

        New transformations that extend this class must contain static
        `PatternNode` fields that represent the nodes in the pattern graph, and
        use them to implement at least three methods:
          * `expressions`: A method that returns a list of graph
                           patterns (SDFG or SDFGState objects) that match this
                           transformation.
          * `can_be_applied`: A method that, given a subgraph candidate,
                              checks for additional conditions whether it can
                              be transformed.
          * `apply`: A method that applies the transformation
                     on the given SDFG.

        For more information and optimization opportunities, see the respective
        methods' documentation.

        In order to be included in lists and apply through the
        `sdfg.apply_transformations` API, each transformation shouls be
        registered with ``Transformation.register`` (or, more commonly,
        the ``@dace.registry.autoregister_params`` class decorator) with two
        optional boolean keyword arguments: ``singlestate`` (default: False)
        and ``coarsening`` (default: False).
        If ``singlestate`` is True, the transformation is matched on subgraphs
        inside an SDFGState; otherwise, subgraphs of the SDFG state machine are
        matched.
        If ``coarsening`` is True, this transformation will be performed automatically
        as part of SDFG dataflow coarsening.
    """

    # Properties
    sdfg_id = Property(dtype=int, category="(Debug)")
    state_id = Property(dtype=int, category="(Debug)")
    _subgraph = DictProperty(key_type=int, value_type=int, category="(Debug)")
    expr_index = Property(dtype=int, category="(Debug)")

    def annotates_memlets(self) -> bool:
        """ Indicates whether the transformation annotates the edges it creates
            or modifies with the appropriate memlets. This determines
            whether to apply memlet propagation after the transformation.
        """
        return False

    def expressions(self) -> List[gr.SubgraphView]:
        """ Returns a list of Graph objects that will be matched in the
            subgraph isomorphism phase. Used as a pre-pass before calling
            `can_be_applied`.
            :see: Transformation.can_be_applied
        """
        raise NotImplementedError

    def can_be_applied(self,
                       graph: Union[SDFG, SDFGState],
                       candidate: Dict['PatternNode', int],
                       expr_index: int,
                       sdfg: SDFG,
                       permissive: bool = False) -> bool:
        """ Returns True if this transformation can be applied on the candidate
            matched subgraph.
            :param graph: SDFGState object if this Transformation is
                          single-state, or SDFG object otherwise.
            :param candidate: A mapping between node IDs returned from
                              `Transformation.expressions` and the nodes in
                              `graph`.
            :param expr_index: The list index from `Transformation.expressions`
                               that was matched.
            :param sdfg: If `graph` is an SDFGState, its parent SDFG. Otherwise
                         should be equal to `graph`.
            :param permissive: Whether transformation should run in permissive mode.
            :return: True if the transformation can be applied.
        """
        raise NotImplementedError

    def apply(self, sdfg: SDFG) -> Union[Any, None]:
        """
        Applies this transformation instance on the matched pattern graph.
        :param sdfg: The SDFG to apply the transformation to.
        :return: A transformation-defined return value, which could be used
                 to pass analysis data out, or nothing.
        """
        raise NotImplementedError

    def match_to_str(self, graph: Union[SDFG, SDFGState],
                     candidate: Dict['PatternNode', int]) -> str:
        """ Returns a string representation of the pattern match on the
            candidate subgraph. Used when identifying matches in the console
            UI.
        """
        return str(list(candidate.values()))

    def __init__(self,
                 sdfg_id: int,
                 state_id: int,
                 subgraph: Dict['PatternNode', int],
                 expr_index: int,
                 override: bool = False,
                 options: Optional[Dict[str, Any]] = None) -> None:
        """ Initializes an instance of Transformation match.
            :param sdfg_id: A unique ID of the SDFG.
            :param state_id: The node ID of the SDFG state, if applicable. If
                             transformation does not operate on a single state,
                             the value should be -1.
            :param subgraph: A mapping between node IDs returned from
                             `Transformation.expressions` and the nodes in
                             `graph`.
            :param expr_index: The list index from `Transformation.expressions`
                               that was matched.
            :param override: If True, accepts the subgraph dictionary as-is
                             (mostly for internal use).
            :param options: An optional dictionary of transformation properties
            :raise TypeError: When transformation is not subclass of
                              Transformation.
            :raise TypeError: When state_id is not instance of int.
            :raise TypeError: When subgraph is not a dict of
                              PatternNode : int.
        """

        self.sdfg_id = sdfg_id
        self.state_id = state_id
        if not override:
            expr = self.expressions()[expr_index]
            for value in subgraph.values():
                if not isinstance(value, int):
                    raise TypeError('All values of '
                                    'subgraph'
                                    ' dictionary must be '
                                    'instances of int.')
            self._subgraph = {expr.node_id(k): v for k, v in subgraph.items()}
        else:
            self._subgraph = {-1: -1}
        # Serializable subgraph with node IDs as keys
        self._subgraph_user = copy.copy(subgraph)
        self.expr_index = expr_index

        # Ease-of-use API: Set new pattern-nodes with information about this
        # instance.
        for pname, pval in self._get_pattern_nodes().items():
            # Create new pattern node from existing field
            new_pnode = PatternNode(
                pval.node if isinstance(pval, PatternNode) else type(pval))
            new_pnode.match_instance = self

            # Append existing values in subgraph dictionary
            if pval in self._subgraph_user:
                self._subgraph_user[new_pnode] = self._subgraph_user[pval]

            # Override static field with the new node in this instance only
            setattr(self, pname, new_pnode)

        # Set properties
        if options is not None:
            for optname, optval in options.items():
                setattr(self, optname, optval)

    @property
    def subgraph(self):
        return self._subgraph_user

    def apply_pattern(self,
                      sdfg: SDFG,
                      append: bool = True,
                      annotate: bool = True) -> Union[Any, None]:
        """
        Applies this transformation on the given SDFG, using the transformation
        instance to find the right SDFG object (based on SDFG ID), and applying
        memlet propagation as necessary.
        :param sdfg: The SDFG (or an SDFG in the same hierarchy) to apply the
                     transformation to.
        :param append: If True, appends the transformation to the SDFG
                       transformation history.
        :return: A transformation-defined return value, which could be used
                 to pass analysis data out, or nothing.
        """
        if append:
            sdfg.append_transformation(self)
        tsdfg: SDFG = sdfg.sdfg_list[self.sdfg_id]
        retval = self.apply(tsdfg)
        if annotate and not self.annotates_memlets():
            propagation.propagate_memlets_sdfg(tsdfg)
        return retval

    def __lt__(self, other: 'Transformation') -> bool:
        """
        Comparing two transformations by their class name and node IDs
        in match. Used for ordering transformations consistently.
        """
        if type(self) != type(other):
            return type(self).__name__ < type(other).__name__

        self_ids = iter(self.subgraph.values())
        other_ids = iter(self.subgraph.values())

        try:
            self_id = next(self_ids)
        except StopIteration:
            return True
        try:
            other_id = next(other_ids)
        except StopIteration:
            return False

        self_end = False

        while self_id is not None and other_id is not None:
            if self_id != other_id:
                return self_id < other_id
            try:
                self_id = next(self_ids)
            except StopIteration:
                self_end = True
            try:
                other_id = next(other_ids)
            except StopIteration:
                if self_end:  # Transformations are equal
                    return False
                return False
            if self_end:
                return True

    @classmethod
    def _get_pattern_nodes(cls) -> Dict[str, 'PatternNode']:
        """
        Returns a dictionary of pattern-matching node in this transformation
        subclass. Used internally for pattern-matching.
        :return: A dictionary mapping between pattern-node name and its type.
        """
        return {
            k: getattr(cls, k)
            for k in dir(cls)
            if isinstance(getattr(cls, k), PatternNode) or (k.startswith(
                '_') and isinstance(getattr(cls, k), (nd.Node, SDFGState)))
        }

    @classmethod
    def apply_to(cls,
                 sdfg: SDFG,
                 options: Optional[Dict[str, Any]] = None,
                 expr_index: int = 0,
                 verify: bool = True,
                 annotate: bool = True,
                 permissive: bool = False,
                 save: bool = True,
                 **where: Union[nd.Node, SDFGState]):
        """
        Applies this transformation to a given subgraph, defined by a set of
        nodes. Raises an error if arguments are invalid or transformation is
        not applicable.

        The subgraph is defined by the `where` dictionary, where each key is
        taken from the `PatternNode` fields of the transformation. For example,
        applying `MapCollapse` on two maps can pe performed as follows:

        ```
        MapCollapse.apply_to(sdfg, outer_map_entry=map_a, inner_map_entry=map_b)
        ```

        :param sdfg: The SDFG to apply the transformation to.
        :param options: A set of parameters to use for applying the
                        transformation.
        :param expr_index: The pattern expression index to try to match with.
        :param verify: Check that `can_be_applied` returns True before applying.
        :param annotate: Run memlet propagation after application if necessary.
        :param permissive: Apply transformation in permissive mode.
        :param save: Save transformation as part of the SDFG file. Set to
                     False if composing transformations.
        :param where: A dictionary of node names (from the transformation) to
                      nodes in the SDFG or a single state.
        """
        if len(where) == 0:
            raise ValueError('At least one node is required')
        options = options or {}

        # Check that all keyword arguments are nodes and if interstate or not
        sample_node = next(iter(where.values()))

        if isinstance(sample_node, SDFGState):
            graph = sdfg
            state_id = -1
        elif isinstance(sample_node, nd.Node):
            graph = next(s for s in sdfg.nodes() if sample_node in s.nodes())
            state_id = sdfg.node_id(graph)
        else:
            raise TypeError('Invalid node type "%s"' %
                            type(sample_node).__name__)

        # Check that all nodes in the pattern are set
        required_nodes = cls.expressions()[expr_index].nodes()
        required_node_names = {
            pname: pval
            for pname, pval in cls._get_pattern_nodes().items()
            if pval in required_nodes
        }
        required = set(required_node_names.keys())
        intersection = required & set(where.keys())
        if len(required - intersection) > 0:
            raise ValueError('Missing nodes for transformation subgraph: %s' %
                             (required - intersection))

        # Construct subgraph and instantiate transformation
        subgraph = {
            required_node_names[k]: graph.node_id(where[k])
            for k in required
        }
        instance = cls(sdfg.sdfg_id, state_id, subgraph, expr_index)

        # Construct transformation parameters
        for optname, optval in options.items():
            if not optname in cls.__properties__:
                raise ValueError('Property "%s" not found in transformation' %
                                 optname)
            setattr(instance, optname, optval)

        if verify:
            if not instance.can_be_applied(
                    graph, subgraph, expr_index, sdfg, permissive=permissive):
                raise ValueError('Transformation cannot be applied on the '
                                 'given subgraph ("can_be_applied" failed)')

        # Apply to SDFG
        return instance.apply_pattern(sdfg, annotate=annotate, append=save)

    def __str__(self) -> str:
        return type(self).__name__

    def print_match(self, sdfg: SDFG) -> str:
        """ Returns a string representation of the pattern match on the
            given SDFG. Used for printing matches in the console UI.
        """
        if not isinstance(sdfg, SDFG):
            raise TypeError("Expected SDFG, got: {}".format(
                type(sdfg).__name__))
        if self.state_id == -1:
            graph = sdfg
        else:
            graph = sdfg.nodes()[self.state_id]
        string = type(self).__name__ + ' in '
        string += self.match_to_str(graph, self.subgraph)
        return string

    def to_json(self, parent=None) -> Dict[str, Any]:
        props = serialize.all_properties_to_json(self)
        return {
            'type': 'Transformation',
            'transformation': type(self).__name__,
            **props
        }

    @staticmethod
    def from_json(json_obj: Dict[str, Any],
                  context: Dict[str, Any] = None) -> 'Transformation':
        xform = next(ext for ext in Transformation.extensions().keys()
                     if ext.__name__ == json_obj['transformation'])

        # Recreate subgraph
        expr = xform.expressions()[json_obj['expr_index']]
        subgraph = {
            expr.node(int(k)): int(v)
            for k, v in json_obj['_subgraph'].items()
        }

        # Reconstruct transformation
        ret = xform(json_obj['sdfg_id'], json_obj['state_id'], subgraph,
                    json_obj['expr_index'])
        context = context or {}
        context['transformation'] = ret
        serialize.set_properties_from_json(
            ret,
            json_obj,
            context=context,
            ignore_properties={'transformation', 'type'})
        return ret
Esempio n. 22
0
class Array(Data):
    """ Array/constant descriptor (dimensions, type and other properties). """

    # Properties
    allow_conflicts = Property(
        dtype=bool,
        default=False,
        desc='If enabled, allows more than one '
        'memlet to write to the same memory location without conflict '
        'resolution.')

    strides = ShapeProperty(
        # element_type=symbolic.pystr_to_symbolic,
        desc='For each dimension, the number of elements to '
        'skip in order to obtain the next element in '
        'that dimension.')

    total_size = SymbolicProperty(
        default=1,
        desc='The total allocated size of the array. Can be used for'
        ' padding.')

    offset = ListProperty(element_type=symbolic.pystr_to_symbolic,
                          desc='Initial offset to translate all indices by.')

    may_alias = Property(dtype=bool,
                         default=False,
                         desc='This pointer may alias with other pointers in '
                         'the same function')

    alignment = Property(dtype=int,
                         default=0,
                         desc='Allocation alignment in bytes (0 uses '
                         'compiler-default)')

    def __init__(self,
                 dtype,
                 shape,
                 transient=False,
                 allow_conflicts=False,
                 storage=dtypes.StorageType.Default,
                 location=None,
                 strides=None,
                 offset=None,
                 may_alias=False,
                 lifetime=dtypes.AllocationLifetime.Scope,
                 alignment=0,
                 debuginfo=None,
                 total_size=None):

        super(Array, self).__init__(dtype, shape, transient, storage, location,
                                    lifetime, debuginfo)

        if shape is None:
            raise IndexError('Shape must not be None')

        self.allow_conflicts = allow_conflicts
        self.may_alias = may_alias
        self.alignment = alignment

        if strides is not None:
            self.strides = cp.copy(strides)
        else:
            self.strides = [_prod(shape[i + 1:]) for i in range(len(shape))]

        self.total_size = total_size or _prod(shape)

        if offset is not None:
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        self.validate()

    def __repr__(self):
        return 'Array (dtype=%s, shape=%s)' % (self.dtype, self.shape)

    def clone(self):
        return Array(self.dtype, self.shape, self.transient,
                     self.allow_conflicts, self.storage, self.location,
                     self.strides, self.offset, self.may_alias, self.lifetime,
                     self.alignment, self.debuginfo, self.total_size)

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        # Take care of symbolic expressions
        attrs['strides'] = list(map(str, attrs['strides']))

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @staticmethod
    def from_json(json_obj, context=None):
        if json_obj['type'] != "Array":
            raise TypeError("Invalid data type")

        # Create dummy object
        ret = Array(dtypes.int8, ())
        serialize.set_properties_from_json(ret, json_obj, context=context)
        # TODO: This needs to be reworked (i.e. integrated into the list property)
        ret.strides = list(map(symbolic.pystr_to_symbolic, ret.strides))

        # Check validity now
        ret.validate()
        return ret

    def validate(self):
        super(Array, self).validate()
        if len(self.strides) != len(self.shape):
            raise TypeError('Strides must be the same size as shape')

        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.strides):
            raise TypeError('Strides must be a list or tuple of integer '
                            'values or symbols')

        if len(self.offset) != len(self.shape):
            raise TypeError('Offset must be the same size as shape')

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sp.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sp.Symbol(str(s), **s.assumptions0)
                else:
                    s = sp.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sp.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sp.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sp.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, Array):
            return False

        # Test type
        if self.dtype != other.dtype:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # Any other case (constant vs. constant), check for equality
            if otherdim != dim:
                return False
        return True

    def as_arg(self, with_types=True, for_call=False, name=None):
        arrname = name

        if not with_types or for_call:
            return arrname
        if self.may_alias:
            return str(self.dtype.ctype) + ' *' + arrname
        return str(self.dtype.ctype) + ' * __restrict__ ' + arrname

    def sizes(self):
        return [
            d.name if isinstance(d, symbolic.symbol) else str(d)
            for d in self.shape
        ]

    @property
    def free_symbols(self):
        result = super().free_symbols
        for s in self.strides:
            if isinstance(s, sp.Expr):
                result |= set(s.free_symbols)
        if isinstance(self.total_size, sp.Expr):
            result |= set(self.total_size.free_symbols)
        for o in self.offset:
            if isinstance(o, sp.Expr):
                result |= set(o.free_symbols)

        return result
Esempio n. 23
0
class OrthogonalTiling(pattern_matching.Transformation):
    """ Implements the orthogonal tiling transformation.

        Orthogonal tiling is a type of nested map fission that creates tiles
        in every dimension of the matched Map.
    """

    _map_entry = nodes.MapEntry(nodes.Map("", [], []))

    # Properties
    prefix = Property(dtype=str,
                      default="tile",
                      desc="Prefix for new iterators")
    tile_sizes = ShapeProperty(dtype=tuple,
                               default=(128, 128, 128),
                               desc="Tile size per dimension")
    divides_evenly = Property(dtype=bool,
                              default=False,
                              desc="Tile size divides dimension length evenly")

    @staticmethod
    def annotates_memlets():
        return False

    @staticmethod
    def expressions():
        return [nxutil.node_path_graph(OrthogonalTiling._map_entry)]

    @staticmethod
    def can_be_applied(graph, candidate, expr_index, sdfg, strict=False):
        return True

    @staticmethod
    def match_to_str(graph, candidate):
        map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]]
        return map_entry.map.label + ': ' + str(map_entry.map.params)

    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        # Tile map.
        target_dim, new_dim, new_map = self.__stripmine(
            sdfg, graph, self.subgraph)
        return new_map

    def __stripmine(self, sdfg, graph, candidate):
        # Retrieve map entry and exit nodes.
        map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]]
        map_exit = graph.exit_nodes(map_entry)[0]

        # Map subgraph
        map_subgraph = graph.scope_subgraph(map_entry)

        # Retrieve transformation properties.
        prefix = self.prefix
        tile_sizes = self.tile_sizes
        divides_evenly = self.divides_evenly

        new_param = []
        new_range = []

        for dim_idx in range(len(map_entry.map.params)):

            if dim_idx >= len(tile_sizes):
                tile_size = tile_sizes[-1]
            else:
                tile_size = tile_sizes[dim_idx]

            # Retrieve parameter and range of dimension to be strip-mined.
            target_dim = map_entry.map.params[dim_idx]
            td_from, td_to, td_step = map_entry.map.range[dim_idx]

            new_dim = prefix + '_' + target_dim

            # Basic values
            if divides_evenly:
                tile_num = '(%s + 1 - %s) / %s' % (symbolic.symstr(td_to),
                                                   symbolic.symstr(td_from),
                                                   str(tile_size))
            else:
                tile_num = 'int_ceil((%s + 1 - %s), %s)' % (symbolic.symstr(
                    td_to), symbolic.symstr(td_from), str(tile_size))

            # Outer map values (over all tiles)
            nd_from = 0
            nd_to = symbolic.pystr_to_symbolic(str(tile_num) + ' - 1')
            nd_step = 1

            # Inner map values (over one tile)
            td_from_new = dace.symbolic.pystr_to_symbolic(td_from)
            td_to_new_exact = symbolic.pystr_to_symbolic(
                'min(%s + 1 - %s * %s, %s + %s) - 1' %
                (symbolic.symstr(td_to), str(new_dim), str(tile_size),
                 td_from_new, str(tile_size)))
            td_to_new_approx = symbolic.pystr_to_symbolic(
                '%s + %s - 1' % (td_from_new, str(tile_size)))

            # Outer map (over all tiles)
            new_dim_range = (nd_from, nd_to, nd_step)
            new_param.append(new_dim)
            new_range.append(new_dim_range)

            # Inner map (over one tile)
            if divides_evenly:
                td_to_new = td_to_new_approx
            else:
                td_to_new = dace.symbolic.SymExpr(td_to_new_exact,
                                                  td_to_new_approx)
            map_entry.map.range[dim_idx] = (td_from_new, td_to_new, td_step)

            # Fix subgraph memlets
            target_dim = dace.symbolic.pystr_to_symbolic(target_dim)
            offset = dace.symbolic.pystr_to_symbolic('%s * %s' %
                                                     (new_dim, str(tile_size)))
            for _, _, _, _, memlet in map_subgraph.edges():
                old_subset = memlet.subset
                if isinstance(old_subset, dace.subsets.Indices):
                    new_indices = []
                    for idx in old_subset:
                        new_idx = idx.subs(target_dim, target_dim + offset)
                        new_indices.append(new_idx)
                    memlet.subset = dace.subsets.Indices(new_indices)
                elif isinstance(old_subset, dace.subsets.Range):
                    new_ranges = []
                    for i, old_range in enumerate(old_subset):
                        if len(old_range) == 3:
                            b, e, s, = old_range
                            t = old_subset.tile_sizes[i]
                        else:
                            raise ValueError('Range %s is invalid.' %
                                             old_range)
                        new_b = b.subs(target_dim, target_dim + offset)
                        new_e = e.subs(target_dim, target_dim + offset)
                        new_s = s.subs(target_dim, target_dim + offset)
                        new_t = t.subs(target_dim, target_dim + offset)
                        new_ranges.append((new_b, new_e, new_s, new_t))
                    memlet.subset = dace.subsets.Range(new_ranges)
                else:
                    raise NotImplementedError

        new_map = nodes.Map(prefix + '_' + map_entry.map.label, new_param,
                            subsets.Range(new_range))
        new_map_entry = nodes.MapEntry(new_map)
        new_exit = nodes.MapExit(new_map)

        # Make internal map's schedule to "not parallel"
        map_entry.map._schedule = types.ScheduleType.Default

        # Redirect/create edges.
        new_in_edges = {}
        for _src, conn, _dest, _, memlet in graph.out_edges(map_entry):
            if not isinstance(sdfg.arrays[memlet.data], dace.data.Scalar):
                new_subset = copy.deepcopy(memlet.subset)
                # new_subset = calc_set_image(map_entry.map.params,
                #                             map_entry.map.range, memlet.subset,
                #                             cont_or_strided)
                if memlet.data in new_in_edges:
                    src, src_conn, dest, dest_conn, new_memlet, num = \
                        new_in_edges[memlet.data]
                    new_memlet.subset = calc_set_union(
                        new_memlet.data, sdfg.arrays[nnew_memlet.data],
                        new_memlet.subset, new_subset)
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_in_edges.update({
                        memlet.data:
                        (src, src_conn, dest, dest_conn, new_memlet,
                         min(num, int(conn[4:])))
                    })
                else:
                    new_memlet = dcpy(memlet)
                    new_memlet.subset = new_subset
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_in_edges.update({
                        memlet.data:
                        (new_map_entry, None, map_entry, None, new_memlet,
                         int(conn[4:]))
                    })
        nxutil.change_edge_dest(graph, map_entry, new_map_entry)

        new_out_edges = {}
        for _src, conn, _dest, _, memlet in graph.in_edges(map_exit):
            if not isinstance(sdfg.arrays[memlet.data], dace.data.Scalar):
                new_subset = memlet.subset
                # new_subset = calc_set_image(map_entry.map.params,
                #                             map_entry.map.range,
                #                             memlet.subset, cont_or_strided)
                if memlet.data in new_out_edges:
                    src, src_conn, dest, dest_conn, new_memlet, num = \
                        new_out_edges[memlet.data]
                    new_memlet.subset = calc_set_union(
                        new_memlet.data, sdfg.arrays[nnew_memlet.data],
                        new_memlet.subset, new_subset)
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_out_edges.update({
                        memlet.data:
                        (src, src_conn, dest, dest_conn, new_memlet,
                         min(num, conn[4:]))
                    })
                else:
                    new_memlet = dcpy(memlet)
                    new_memlet.subset = new_subset
                    new_memlet.num_accesses = new_memlet.num_elements()
                    new_out_edges.update({
                        memlet.data:
                        (map_exit, None, new_exit, None, new_memlet, conn[4:])
                    })
        nxutil.change_edge_src(graph, map_exit, new_exit)

        # Connector related work follows
        # 1. Dictionary 'old_connector_number': 'new_connector_numer'
        # 2. New node in/out connectors
        # 3. New edges

        in_conn_nums = []
        for _, e in new_in_edges.items():
            _, _, _, _, _, num = e
            in_conn_nums.append(num)
        in_conn = {}
        for i, num in enumerate(in_conn_nums):
            in_conn.update({num: i + 1})

        entry_in_connectors = set()
        entry_out_connectors = set()
        for i in range(len(in_conn_nums)):
            entry_in_connectors.add('IN_' + str(i + 1))
            entry_out_connectors.add('OUT_' + str(i + 1))
        new_map_entry.in_connectors = entry_in_connectors
        new_map_entry.out_connectors = entry_out_connectors

        for _, e in new_in_edges.items():
            src, _, dst, _, memlet, num = e
            graph.add_edge(src, 'OUT_' + str(in_conn[num]), dst,
                           'IN_' + str(in_conn[num]), memlet)

        out_conn_nums = []
        for _, e in new_out_edges.items():
            _, _, dst, _, _, num = e
            if dst is not new_exit:
                continue
            out_conn_nums.append(num)
        out_conn = {}
        for i, num in enumerate(out_conn_nums):
            out_conn.update({num: i + 1})

        exit_in_connectors = set()
        exit_out_connectors = set()
        for i in range(len(out_conn_nums)):
            exit_in_connectors.add('IN_' + str(i + 1))
            exit_out_connectors.add('OUT_' + str(i + 1))
        new_exit.in_connectors = exit_in_connectors
        new_exit.out_connectors = exit_out_connectors

        for _, e in new_out_edges.items():
            src, _, dst, _, memlet, num = e
            graph.add_edge(src, 'OUT_' + str(out_conn[num]), dst,
                           'IN_' + str(out_conn[num]), memlet)

        # Return strip-mined dimension.
        return target_dim, new_dim, new_map

    @staticmethod
    def __modify_edges(sdfg, graph, candidate, target_dim, new_dim):
        map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]]

        processed = []
        for src, _dest, memlet, _scope in nxutil.traverse_sdfg_scope(
                graph, map_entry, True):
            if memlet in processed:
                continue
            processed.append(memlet)

            # Corner cases
            if isinstance(sdfg.arrays[memlet.data], dace.data.Stream):
                continue
            if memlet.wcr is not None:
                memlet.num_accesses = 1
                continue

            for i, dim in enumerate(memlet.subset):
                if isinstance(dim, tuple):
                    dim = tuple(
                        symbolic.pystr_to_symbolic(d).subs(
                            symbolic.pystr_to_symbolic(target_dim),
                            symbolic.pystr_to_symbolic('%s + %s' %
                                                       (str(new_dim),
                                                        str(target_dim))))
                        for d in dim)
                else:
                    dim = symbolic.pystr_to_symbolic(dim).subs(
                        symbolic.pystr_to_symbolic(target_dim),
                        symbolic.pystr_to_symbolic(
                            '%s + %s' % (str(new_dim), str(target_dim))))

                memlet.subset[i] = dim
        return
Esempio n. 24
0
class Data(object):
    """ Data type descriptors that can be used as references to memory.
        Examples: Arrays, Streams, custom arrays (e.g., sparse matrices).
    """

    dtype = TypeClassProperty(default=dtypes.int32)
    shape = ShapeProperty(default=[])
    transient = Property(dtype=bool, default=False)
    storage = Property(dtype=dtypes.StorageType,
                       desc="Storage location",
                       choices=dtypes.StorageType,
                       default=dtypes.StorageType.Default,
                       from_string=lambda x: dtypes.StorageType[x])
    lifetime = Property(dtype=dtypes.AllocationLifetime,
                        desc='Data allocation span',
                        choices=dtypes.AllocationLifetime,
                        default=dtypes.AllocationLifetime.Scope,
                        from_string=lambda x: dtypes.AllocationLifetime[x])
    location = DictProperty(
        key_type=str,
        value_type=symbolic.pystr_to_symbolic,
        desc='Full storage location identifier (e.g., rank, GPU ID)')
    debuginfo = DebugInfoProperty(allow_none=True)

    def __init__(self, dtype, shape, transient, storage, location, lifetime,
                 debuginfo):
        self.dtype = dtype
        self.shape = shape
        self.transient = transient
        self.storage = storage
        self.location = location if location is not None else {}
        self.lifetime = lifetime
        self.debuginfo = debuginfo
        self._validate()

    def validate(self):
        """ Validate the correctness of this object.
            Raises an exception on error. """
        self._validate()

    # Validation of this class is in a separate function, so that this
    # class can call `_validate()` without calling the subclasses'
    # `validate` function.
    def _validate(self):
        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic)) for s in self.shape):
            raise TypeError('Shape must be a list or tuple of integer values '
                            'or symbols')
        return True

    def to_json(self):
        attrs = serialize.all_properties_to_json(self)

        retdict = {"type": type(self).__name__, "attributes": attrs}

        return retdict

    @property
    def toplevel(self):
        return self.lifetime is not dtypes.AllocationLifetime.Scope

    def copy(self):
        raise RuntimeError(
            'Data descriptors are unique and should not be copied')

    def is_equivalent(self, other):
        """ Check for equivalence (shape and type) of two data descriptors. """
        raise NotImplementedError

    def as_arg(self, with_types=True, for_call=False, name=None):
        """Returns a string for a C++ function signature (e.g., `int *A`). """
        raise NotImplementedError

    @property
    def free_symbols(self) -> Set[symbolic.SymbolicType]:
        """ Returns a set of undefined symbols in this data descriptor. """
        result = set()
        for s in self.shape:
            if isinstance(s, sp.Basic):
                result |= set(s.free_symbols)
        return result

    def __repr__(self):
        return 'Abstract Data Container, DO NOT USE'

    @property
    def veclen(self):
        return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1
Esempio n. 25
0
class LibraryNode(CodeNode):

    name = Property(dtype=str, desc="Name of node")
    implementation = LibraryImplementationProperty(
        dtype=str,
        allow_none=True,
        desc=("Which implementation this library node will expand into."
              "Must match a key in the list of possible implementations."))
    schedule = Property(
        dtype=dtypes.ScheduleType,
        desc="If set, determines the default device mapping of "
        "the node upon expansion, if expanded to a nested SDFG.",
        choices=dtypes.ScheduleType,
        from_string=lambda x: dtypes.ScheduleType[x],
        default=dtypes.ScheduleType.Default)
    debuginfo = DebugInfoProperty()

    def __init__(self, name, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.name = name
        self.label = name

    # Overrides subclasses to return LibraryNode as their JSON type
    @property
    def __jsontype__(self):
        return 'LibraryNode'

    # Based on https://stackoverflow.com/a/2020083/6489142
    def _fullclassname(self):
        module = self.__class__.__module__
        if module is None or module == str.__class__.__module__:
            return self.__class__.__name__  # Avoid reporting __builtin__
        else:
            return module + '.' + self.__class__.__name__

    def to_json(self, parent):
        jsonobj = super().to_json(parent)
        jsonobj['classpath'] = self._fullclassname()
        return jsonobj

    @classmethod
    def from_json(cls, json_obj, context=None):
        if cls == LibraryNode:
            clazz = pydoc.locate(json_obj['classpath'])
            if clazz is None:
                raise TypeError('Unrecognized library node type "%s"' %
                                json_obj['classpath'])
            return clazz.from_json(json_obj, context)
        else:  # Subclasses are actual library nodes
            ret = cls(json_obj['attributes']['name'])
            dace.serialize.set_properties_from_json(ret,
                                                    json_obj,
                                                    context=context)
            return ret

    def expand(self, sdfg, state, *args, **kwargs) -> str:
        """ Create and perform the expansion transformation for this library
            node.
            :return: the name of the expanded implementation
        """
        implementation = self.implementation
        library_name = getattr(type(self), '_dace_library_name', '')
        try:
            if library_name:
                config_implementation = Config.get("library", library_name,
                                                   "default_implementation")
            else:
                config_implementation = None
        except KeyError:
            # Non-standard libraries are not defined in the config schema, and
            # thus might not exist in the config.
            config_implementation = None
        if config_implementation is not None:
            try:
                config_override = Config.get("library", library_name,
                                             "override")
                if config_override and implementation in self.implementations:
                    if implementation is not None:
                        warnings.warn(
                            "Overriding explicitly specified "
                            "implementation {} for {} with {}.".format(
                                implementation, self.label,
                                config_implementation))
                    implementation = config_implementation
            except KeyError:
                config_override = False
        # If not explicitly set, try the node default
        if implementation is None:
            implementation = type(self).default_implementation
            # If no node default, try library default
            if implementation is None:
                import dace.library  # Avoid cyclic dependency
                lib = dace.library._DACE_REGISTERED_LIBRARIES[type(
                    self)._dace_library_name]
                implementation = lib.default_implementation
                # Try the default specified in the config
                if implementation is None:
                    implementation = config_implementation
                    # Otherwise we don't know how to expand
                    if implementation is None:
                        raise ValueError("No implementation or default "
                                         "implementation specified.")
        if implementation not in self.implementations.keys():
            raise KeyError("Unknown implementation for node {}: {}".format(
                type(self).__name__, implementation))
        transformation_type = type(self).implementations[implementation]
        sdfg_id = sdfg.sdfg_id
        state_id = sdfg.nodes().index(state)
        subgraph = {transformation_type._match_node: state.node_id(self)}
        transformation = transformation_type(sdfg_id, state_id, subgraph, 0)
        transformation.apply(sdfg, *args, **kwargs)
        return implementation

    @classmethod
    def register_implementation(cls, name, transformation_type):
        """Register an implementation to belong to this library node type."""
        cls.implementations[name] = transformation_type
        transformation_type._match_node = cls
Esempio n. 26
0
class SubgraphFusion(transformation.SubgraphTransformation):
    """ Implements the SubgraphFusion transformation.
        Fuses together the maps contained in the subgraph and pushes inner nodes
        into a global outer map, creating transients and new connections
        where necessary.

        SubgraphFusion requires all lowest scope level maps in the subgraph
        to have the same indices and parameter range in every dimension.
        This can be achieved using the MultiExpansion transformation first.
        Reductions can also be expanded using ReduceExpansion as a
        preprocessing step.

    """

    debug = Property(desc="Show debug info", dtype=bool, default=False)

    transient_allocation = Property(
        desc="Storage Location to push transients to that are "
        "fully contained within the subgraph.",
        dtype=dtypes.StorageType,
        default=dtypes.StorageType.Default)

    @staticmethod
    def can_be_applied(sdfg: SDFG, subgraph: SubgraphView) -> bool:
        '''
        Fusible if
        1. Maps have the same access sets and ranges in order
        2. Any nodes in between two maps are AccessNodes only, without WCR
           There is at most one AccessNode only on a path between two maps,
           no other nodes are allowed
        3. The exiting memlets' subsets to an intermediate edge must cover
           the respective incoming memlets' subset into the next map
        '''
        # get graph
        graph = subgraph.graph
        for node in subgraph.nodes():
            if node not in graph.nodes():
                return False

        # next, get all the maps
        map_entries = helpers.get_highest_scope_maps(sdfg, graph, subgraph)
        map_exits = [graph.exit_node(map_entry) for map_entry in map_entries]
        maps = [map_entry.map for map_entry in map_entries]

        # 1. check whether all map ranges and indices are the same
        if len(maps) <= 1:
            return False
        base_map = maps[0]
        for map in maps:
            if map.get_param_num() != base_map.get_param_num():
                return False
            if not all(
                [p1 == p2 for (p1, p2) in zip(map.params, base_map.params)]):
                return False
            if not map.range == base_map.range:
                return False

        # 1.1 check whether all map entries have the same schedule
        schedule = map_entries[0].schedule
        if not all([entry.schedule == schedule for entry in map_entries]):
            return False

        # 2. check intermediate feasiblility
        # see map_fusion.py for similar checks
        # we are being more relaxed here

        # 2.1 do some preparation work first:
        # calculate all out_nodes and intermediate_nodes
        # definition see in apply()
        intermediate_nodes = set()
        out_nodes = set()
        for map_entry, map_exit in zip(map_entries, map_exits):
            for edge in graph.out_edges(map_exit):
                current_node = edge.dst
                if len(graph.out_edges(current_node)) == 0:
                    out_nodes.add(current_node)
                else:
                    for dst_edge in graph.out_edges(current_node):
                        if dst_edge.dst in map_entries:
                            intermediate_nodes.add(current_node)
                        else:
                            out_nodes.add(current_node)

        # 2.2 topological feasibility:
        # For each intermediate and out node: must never reach any map
        # entry if it is not connected to map entry immediately
        visited = set()

        # for memoization purposes
        def visit_descendants(graph, node, visited, map_entries):
            # if we have already been at this node
            if node in visited:
                return True
            # not necessary to add if there aren't any other in connections
            if len(graph.in_edges(node)) > 1:
                visited.add(node)
            for oedge in graph.out_edges(node):
                if not visit_descendants(graph, oedge.dst, visited,
                                         map_entries):
                    return False
            return True

        for node in intermediate_nodes | out_nodes:
            # these nodes must not lead to a map entry
            nodes_to_check = set()
            for oedge in graph.out_edges(node):
                if oedge.dst not in map_entries:
                    nodes_to_check.add(oedge.dst)

            for forbidden_node in nodes_to_check:
                if not visit_descendants(graph, forbidden_node, visited,
                                         map_entries):
                    return False

        # 2.3 memlet feasibility
        # For each intermediate node, look at whether inner adjacent
        # memlets of the exiting map cover inner adjacent memlets
        # of the next entering map.
        # We also check for any WCRs on the fly.

        for node in intermediate_nodes:
            upper_subsets = set()
            lower_subsets = set()
            # First, determine which dimensions of the memlet ranges
            # change with the map, we do not need to care about the other dimensions.
            total_dims = len(sdfg.data(node.data).shape)
            dims_to_discard = SubgraphFusion.get_invariant_dimensions(
                sdfg, graph, map_entries, map_exits, node)

            # find upper_subsets
            for in_edge in graph.in_edges(node):
                # first check for WCRs
                if in_edge.data.wcr:
                    return False
                if in_edge.src in map_exits:
                    edge = graph.memlet_path(in_edge)[-2]
                    subset_to_add = dcpy(edge.data.subset\
                                         if edge.data.data == node.data\
                                         else edge.data.other_subset)
                    subset_to_add.pop(dims_to_discard)
                    upper_subsets.add(subset_to_add)
                else:
                    raise NotImplementedError("Nodes between two maps to be"
                                              "fused with *incoming* edges"
                                              "from outside the maps are not"
                                              "allowed yet.")

            # find lower_subsets
            for out_edge in graph.out_edges(node):
                if out_edge.dst in map_entries:
                    # cannot use memlet tree here as there could be
                    # not just one map succedding. Do it manually
                    for oedge in graph.out_edges(out_edge.dst):
                        if oedge.src_conn[3:] == out_edge.dst_conn[2:]:
                            subset_to_add = dcpy(oedge.data.subset \
                                                 if edge.data.data == node.data \
                                                 else edge.data.other_subset)
                            subset_to_add.pop(dims_to_discard)
                            lower_subsets.add(subset_to_add)

            upper_iter = iter(upper_subsets)
            union_upper = next(upper_iter)

            # TODO: add this check at a later point
            # We assume that upper_subsets for each data array
            # are contiguous
            # or do the full check if possible (intersection needed)
            '''
            # check whether subsets in upper_subsets are adjacent.
            # this is a requriement for the current implementation
            #try:
            # O(n^2*|dims|) but very small amount of subsets anyway
            try:
                for dim in range(total_dims - len(dims_to_discard)):
                    ordered_list = [(-1,-1,-1)]
                    for upper_subset in upper_subsets:
                        lo = upper_subset[dim][0]
                        hi = upper_subset[dim][1]
                        for idx,element in enumerate(ordered_list):
                            if element[0] <= lo and element[1] >= hi:
                                break
                            if element[0] > lo:
                                ordered_list.insert(idx, (lo,hi))
                    ordered_list.pop(0)


                    highest = ordered_list[0][1]
                    for i in range(len(ordered_list)):
                        if i < len(ordered_list)-1:
                            current_range = ordered_list[i]
                            if current_range[1] > highest:
                                hightest = current_range[1]
                            next_range = ordered_list[i+1]
                            if highest < next_range[0] - 1:
                                return False
            except TypeError:
                #return False
            '''
            # FORNOW: just omit warning if unsure
            for lower_subset in lower_subsets:
                covers = False
                for upper_subset in upper_subsets:
                    if upper_subset.covers(lower_subset):
                        covers = True
                        break
                if not covers:
                    warnings.warn(
                        f"WARNING: For node {node}, please check assure that"
                        "incoming memlets cover outgoing ones. Ambiguous check (WIP)."
                    )

            # now take union of upper subsets
            for subs in upper_iter:
                union_upper = subsets.union(union_upper, subs)
                if not union_upper:
                    # something went wrong using union -- we'd rather abort
                    return False

            # finally check coverage
            for lower_subset in lower_subsets:
                if not union_upper.covers(lower_subset):
                    return False

        return True

    @staticmethod
    def get_invariant_dimensions(sdfg, graph, map_entries, map_exits, node):
        '''
        on a non-fused graph, return a set of
        indices that correspond to array dimensions that
        do not change when we are entering maps
        for an access node
        '''
        variate_dimensions = set()
        subset_length = -1

        for in_edge in graph.in_edges(node):
            if in_edge.src in map_exits:
                other_edge = graph.memlet_path(in_edge)[-2]
                other_subset = other_edge.data.subset \
                               if other_edge.data.data == node.data \
                               else other_edge.data.other_subset

                for (idx, (ssbs1, ssbs2)) \
                    in enumerate(zip(in_edge.data.subset, other_subset)):
                    if ssbs1 != ssbs2:
                        variate_dimensions.add(idx)
            else:
                raise NotImplementedError("Nodes between two maps to be"
                                          "fused with *incoming* edges"
                                          "from outside the maps are not"
                                          "allowed yet.")

            if subset_length < 0:
                subset_length = other_subset.dims()
            else:
                assert other_subset.dims() == subset_length

        for out_edge in graph.out_edges(node):
            if out_edge.dst in map_entries:
                for other_edge in graph.out_edges(out_edge.dst):
                    if other_edge.src_conn[3:] == out_edge.dst_conn[2:]:
                        other_subset = other_edge.data.subset \
                                       if other_edge.data.data == node.data \
                                       else other_edge.data.other_subset
                        for (idx, (ssbs1, ssbs2)) in enumerate(
                                zip(out_edge.data.subset, other_subset)):
                            if ssbs1 != ssbs2:
                                variate_dimensions.add(idx)
                        assert other_subset.dims() == subset_length

        invariant_dimensions = set([i for i in range(subset_length)
                                    ]) - variate_dimensions
        return invariant_dimensions

    def redirect_edge(self,
                      graph,
                      edge,
                      new_src=None,
                      new_src_conn=None,
                      new_dst=None,
                      new_dst_conn=None,
                      new_data=None):

        data = new_data if new_data else edge.data
        if new_src:
            ret = graph.add_edge(new_src, new_src_conn, edge.dst, edge.dst_conn,
                                 data)
            graph.remove_edge(edge)
        if new_dst:
            ret = graph.add_edge(edge.src, edge.src_conn, new_dst, new_dst_conn,
                                 data)
            graph.remove_edge(edge)

        return ret

    def prepare_intermediate_nodes(self,
                                   sdfg,
                                   graph,
                                   in_nodes,
                                   out_nodes,
                                   intermediate_nodes,
                                   map_entries,
                                   map_exits,
                                   do_not_override=[]):
        ''' For every interemediate node, determines whether
        it is fully contained in the subgraph and whether it has
        any out connections and thus transients need to be created
        '''
        def redirect(redirect_node, original_node):
            # redirect all outgoing traffic which
            # does not enter fusion scope again
            # from original_node to redirect_node
            # and then create a path from original_node to redirect_node.

            edges = list(graph.out_edges(original_node))
            for edge in edges:
                if edge.dst not in map_entries:
                    self.redirect_edge(graph, edge, new_src=redirect_node)

            graph.add_edge(original_node, None, redirect_node, None, Memlet())

        # first search whether intermediate_nodes appear outside of subgraph
        # and store it in dict
        data_counter = defaultdict(int)
        data_counter_subgraph = defaultdict(int)

        data_intermediate = set([node.data for node in intermediate_nodes])

        # do a full global search and count each data from each intermediate node
        scope_dict = graph.scope_dict()
        for state in sdfg.nodes():
            for node in state.nodes():
                if isinstance(
                        node,
                        nodes.AccessNode) and node.data in data_intermediate:
                    # add them to the counter set in all cases
                    data_counter[node.data] += 1
                    # see whether we are inside the subgraph scope
                    # if so, add to data_counter_subgraph
                    # DO NOT add if it is in out_nodes
                    if state == graph and \
                       (node in intermediate_nodes or scope_dict[node] in map_entries):
                        data_counter_subgraph[node.data] += 1

        # next up: If intermediate_counter and global counter match and if the array
        # is declared transient, it is fully contained by the subgraph

        subgraph_contains_data = {data: data_counter[data] == data_counter_subgraph[data] \
                                        and sdfg.data(data).transient \
                                        and data not in do_not_override \
                                  for data in data_intermediate}

        transients_created = {}
        for node in intermediate_nodes & out_nodes:
            # create new transient at exit replacing the array
            # and redirect all traffic
            data_ref = sdfg.data(node.data)
            out_trans_data_name = node.data + '_OUT'
            data_trans = sdfg.add_transient(name=out_trans_data_name,
                                            shape=dcpy(data_ref.shape),
                                            dtype=dcpy(data_ref.dtype),
                                            storage=dcpy(data_ref.storage),
                                            offset=dcpy(data_ref.offset))
            node_trans = graph.add_access(out_trans_data_name)
            if node.setzero:
                node_trans.setzero = True
            redirect(node_trans, node)
            transients_created[node] = node_trans

        # finally, create dict for every array that for which
        # subgraph_contains_data is true that lists invariant axes.
        invariant_dimensions = {}
        for node in intermediate_nodes:
            if subgraph_contains_data[node.data]:
                # only need to check in this case
                # else the array doesn't get modified and we don't
                # need invariate dimensions
                data = node.data
                inv_dims = SubgraphFusion.get_invariant_dimensions(
                    sdfg, graph, map_entries, map_exits, node)
                if node in invariant_dimensions:
                    # do a check -- we want the same result for each
                    # node containing the same data
                    if not inv_dims == invariant_dimensions[node]:
                        warnings.warn(
                            f"WARNING: Data dimensions that are not propagated through differ"
                            "across multiple instances of access nodes for data {node.data}"
                            "Please check whether all memlets to AccessNodes containing"
                            "this data are sound.")
                        invariant_dimensions[data] |= inv_dims

                else:
                    invariant_dimensions[data] = inv_dims

        return (subgraph_contains_data, transients_created,
                invariant_dimensions)

    def apply(self, sdfg, do_not_override=None, **kwargs):
        subgraph = self.subgraph_view(sdfg)
        graph = subgraph.graph

        map_entries = helpers.get_highest_scope_maps(sdfg, graph, subgraph)
        self.fuse(sdfg, graph, map_entries, do_not_override, **kwargs)

    def fuse(self, sdfg, graph, map_entries, do_not_override=None, **kwargs):
        """ takes the map_entries specified and tries to fuse maps.

            all maps have to be extended into outer and inner map
            (use MapExpansion as a pre-pass)

            Arrays that don't exist outside the subgraph get pushed
            into the map and their data dimension gets cropped.
            Otherwise the original array is taken.

            For every output respective connections are crated automatically.

            :param sdfg: SDFG
            :param graph: State
            :param map_entries: Map Entries (class MapEntry) of the outer maps
                                which we want to fuse
            :param do_not_override: List of data names whose corresponding nodes
                                    are fully contained within the subgraph
                                    but should not be augmented/transformed
                                    nevertheless.
        """

        # if there are no maps, return immediately
        if len(map_entries) == 0:
            return

        do_not_override = do_not_override or []

        # get maps and map exits
        maps = [map_entry.map for map_entry in map_entries]
        map_exits = [graph.exit_node(map_entry) for map_entry in map_entries]

        # Nodes that flow into one or several maps but no data is flowed to them from any map
        in_nodes = set()

        # Nodes into which data is flowed but that no data flows into any map from them
        out_nodes = set()

        # Nodes that act as intermediate node - data flows from a map into them and then there
        # is an outgoing path into another map
        intermediate_nodes = set()

        ### NOTE:
        #- in_nodes, out_nodes, intermediate_nodes refer to the configuration of the final fused map
        #- in_nodes and out_nodes are trivially disjoint
        #- Intermediate_nodes and out_nodes are not necessarily disjoint
        #- Intermediate_nodes and in_nodes are disjoint by design.
        #  There could be a node that has both incoming edges from a map exit
        #  and from outside, but it is just treated as intermediate_node and handled
        #  automatically.

        for map_entry, map_exit in zip(map_entries, map_exits):
            for edge in graph.in_edges(map_entry):
                in_nodes.add(edge.src)
            for edge in graph.out_edges(map_exit):
                current_node = edge.dst
                if len(graph.out_edges(current_node)) == 0:
                    out_nodes.add(current_node)
                else:
                    for dst_edge in graph.out_edges(current_node):
                        if dst_edge.dst in map_entries:
                            # add to intermediate_nodes
                            intermediate_nodes.add(current_node)

                        else:
                            # add to out_nodes
                            out_nodes.add(current_node)
                for e in graph.in_edges(current_node):
                    if e.src not in map_exits:
                        raise NotImplementedError(
                            "Nodes between two maps to be"
                            "fused with *incoming* edges"
                            "from outside the maps are not"
                            "allowed yet.")

        # any intermediate_nodes currently in in_nodes shouldnt be there
        in_nodes -= intermediate_nodes

        if self.debug:
            print("SubgraphFusion::In_nodes", in_nodes)
            print("SubgraphFusion::Out_nodes", out_nodes)
            print("SubgraphFusion::Intermediate_nodes", intermediate_nodes)

        # all maps are assumed to have the same params and range in order
        global_map = nodes.Map(label="outer_fused",
                               params=maps[0].params,
                               ndrange=maps[0].range)
        global_map_entry = nodes.MapEntry(global_map)
        global_map_exit = nodes.MapExit(global_map)

        schedule = map_entries[0].schedule
        global_map_entry.schedule = schedule
        graph.add_node(global_map_entry)
        graph.add_node(global_map_exit)

        # next up, for any intermediate node, find whether it only appears
        # in the subgraph or also somewhere else / as an input
        # create new transients for nodes that are in out_nodes and
        # intermediate_nodes simultaneously
        # also check which dimensions of each transient data element correspond
        # to map axes and write this information into a dict.
        node_info = self.prepare_intermediate_nodes(sdfg, graph, in_nodes, out_nodes, \
                                                    intermediate_nodes,\
                                                    map_entries, map_exits, \
                                                    do_not_override)

        (subgraph_contains_data, transients_created,
         invariant_dimensions) = node_info
        if self.debug:
            print(
                "SubgraphFusion:: {Intermediate_node: subgraph_contains_data} dict"
            )
            print(subgraph_contains_data)

        inconnectors_dict = {}
        # Dict for saving incoming nodes and their assigned connectors
        # Format: {access_node: (edge, in_conn, out_conn)}

        for map_entry, map_exit in zip(map_entries, map_exits):
            # handle inputs
            # TODO: dynamic map range -- this is fairly unrealistic in such a setting
            for edge in graph.in_edges(map_entry):
                src = edge.src
                mmt = graph.memlet_tree(edge)
                out_edges = [child.edge for child in mmt.root().children]

                if src in in_nodes:
                    in_conn = None
                    out_conn = None
                    if src in inconnectors_dict:
                        # no need to augment subset of outer edge.
                        # will do this at the end in one pass.

                        in_conn = inconnectors_dict[src][1]
                        out_conn = inconnectors_dict[src][2]
                        graph.remove_edge(edge)

                    else:
                        next_conn = global_map_entry.next_connector()
                        in_conn = 'IN_' + next_conn
                        out_conn = 'OUT_' + next_conn
                        global_map_entry.add_in_connector(in_conn)
                        global_map_entry.add_out_connector(out_conn)

                        inconnectors_dict[src] = (edge, in_conn, out_conn)

                        # reroute in edge via global_map_entry
                        self.redirect_edge(graph, edge, new_dst = global_map_entry, \
                                                        new_dst_conn = in_conn)

                    # map out edges to new map
                    for out_edge in out_edges:
                        self.redirect_edge(graph, out_edge, new_src = global_map_entry, \
                                                            new_src_conn = out_conn)

                else:
                    # connect directly
                    for out_edge in out_edges:
                        mm = dcpy(out_edge.data)
                        self.redirect_edge(graph,
                                           out_edge,
                                           new_src=src,
                                           new_data=mm)

                    graph.remove_edge(edge)

            for edge in graph.out_edges(map_entry):
                # special case: for nodes that have no data connections
                if not edge.src_conn:
                    self.redirect_edge(graph, edge, new_src=global_map_entry)

            ######################################

            for edge in graph.in_edges(map_exit):
                if not edge.dst_conn:
                    # no destination connector, path ends here.
                    self.redirect_edge(graph, edge, new_dst=global_map_exit)
                    continue
                # find corresponding out_edges for current edge, cannot use mmt anymore
                out_edges = [
                    oedge for oedge in graph.out_edges(map_exit)
                    if oedge.src_conn[3:] == edge.dst_conn[2:]
                ]

                # Tuple to store in/out connector port that might be created
                port_created = None

                for out_edge in out_edges:
                    dst = out_edge.dst

                    if dst in intermediate_nodes & out_nodes:

                        # create connection through global map from
                        # dst to dst_transient that was created
                        dst_transient = transients_created[dst]
                        next_conn = global_map_exit.next_connector()
                        in_conn = 'IN_' + next_conn
                        out_conn = 'OUT_' + next_conn
                        global_map_exit.add_in_connector(in_conn)
                        global_map_exit.add_out_connector(out_conn)

                        inner_memlet = dcpy(edge.data)
                        inner_memlet.other_subset = dcpy(edge.data.subset)

                        e_inner = graph.add_edge(dst, None, global_map_exit,
                                                 in_conn, inner_memlet)
                        mm_outer = propagate_memlet(graph, inner_memlet, global_map_entry, \
                                                    union_inner_edges = False)

                        e_outer = graph.add_edge(global_map_exit, out_conn,
                                                 dst_transient, None, mm_outer)

                        # remove edge from dst to dst_transient that was created
                        # in intermediate preparation.
                        for e in graph.out_edges(dst):
                            if e.dst == dst_transient:
                                graph.remove_edge(e)
                                removed = True
                                break

                        if self.debug:
                            assert removed == True

                    # handle separately: intermediate_nodes and pure out nodes
                    # case 1: intermediate_nodes: can just redirect edge
                    if dst in intermediate_nodes:
                        self.redirect_edge(graph,
                                           out_edge,
                                           new_src=edge.src,
                                           new_src_conn=edge.src_conn,
                                           new_data=dcpy(edge.data))

                    # case 2: pure out node: connect to outer array node
                    if dst in (out_nodes - intermediate_nodes):
                        if edge.dst != global_map_exit:
                            next_conn = global_map_exit.next_connector()
                            in_conn = 'IN_' + next_conn
                            out_conn = 'OUT_' + next_conn
                            global_map_exit.add_in_connector(in_conn)
                            global_map_exit.add_out_connector(out_conn)
                            self.redirect_edge(graph,
                                               edge,
                                               new_dst=global_map_exit,
                                               new_dst_conn=in_conn)
                            port_created = (in_conn, out_conn)
                            #edge.dst = global_map_exit
                            #edge.dst_conn = in_conn

                        else:
                            conn_nr = edge.dst_conn[3:]
                            in_conn = port_created.st
                            out_conn = port_created.nd

                        # map
                        graph.add_edge(global_map_exit, out_conn, dst, None,
                                       dcpy(out_edge.data))
                        graph.remove_edge(out_edge)

                # remove the edge if it has not been used by any pure out node
                if not port_created:
                    graph.remove_edge(edge)

            # maps are now ready to be discarded
            graph.remove_node(map_entry)
            graph.remove_node(map_exit)

            # end main loop.

        # create a mapping from data arrays to offsets
        # for later memlet adjustments later
        min_offsets = dict()

        # do one pass to augment all transient arrays
        data_intermediate = set([node.data for node in intermediate_nodes])
        for data_name in data_intermediate:
            if subgraph_contains_data[data_name]:
                all_nodes = [
                    n for n in intermediate_nodes if n.data == data_name
                ]
                in_edges = list(chain(*(graph.in_edges(n) for n in all_nodes)))

                in_edges_iter = iter(in_edges)
                in_edge = next(in_edges_iter)
                target_subset = dcpy(in_edge.data.subset)
                target_subset.pop(invariant_dimensions[data_name])
                ######
                while True:
                    try:  # executed if there are multiple in_edges
                        in_edge = next(in_edges_iter)
                        target_subset_curr = dcpy(in_edge.data.subset)
                        target_subset_curr.pop(invariant_dimensions[data_name])
                        target_subset = subsets.union(target_subset, \
                                                      target_subset_curr)
                    except StopIteration:
                        break

                min_offsets_cropped = target_subset.min_element_approx()
                # calculate the new transient array size.
                target_subset.offset(min_offsets_cropped, True)

                # re-add invariant dimensions with offset 0 and save to min_offsets
                min_offset = []
                index = 0
                for i in range(len(sdfg.data(data_name).shape)):
                    if i in invariant_dimensions[data_name]:
                        min_offset.append(0)
                    else:
                        min_offset.append(min_offsets_cropped[index])
                        index += 1

                min_offsets[data_name] = min_offset

                # determine the shape of the new array.
                new_data_shape = []
                index = 0
                for i, sz in enumerate(sdfg.data(data_name).shape):
                    if i in invariant_dimensions[data_name]:
                        new_data_shape.append(sz)
                    else:
                        new_data_shape.append(target_subset.size()[index])
                        index += 1

                new_data_strides = [
                    data._prod(new_data_shape[i + 1:])
                    for i in range(len(new_data_shape))
                ]

                new_data_totalsize = data._prod(new_data_shape)
                new_data_offset = [0] * len(new_data_shape)
                # augment.
                transient_to_transform = sdfg.data(data_name)
                transient_to_transform.shape = new_data_shape
                transient_to_transform.strides = new_data_strides
                transient_to_transform.total_size = new_data_totalsize
                transient_to_transform.offset = new_data_offset
                transient_to_transform.lifetime = dtypes.AllocationLifetime.Scope
                transient_to_transform.storage = self.transient_allocation

            else:
                # don't modify data container - array is needed outside
                # of subgraph.

                # hack: set lifetime to State if allocation has only been
                # scope so far to avoid allocation issues
                if sdfg.data(
                        data_name).lifetime == dtypes.AllocationLifetime.Scope:
                    sdfg.data(
                        data_name).lifetime = dtypes.AllocationLifetime.State

        # do one pass to adjust and the memlets of in-between transients
        for node in intermediate_nodes:
            # all incoming edges to node
            in_edges = graph.in_edges(node)
            # outgoing edges going to another fused part
            inter_edges = []
            # outgoing edges that exit global map
            out_edges = []
            for e in graph.out_edges(node):
                if e.dst == global_map_exit:
                    out_edges.append(e)
                else:
                    inter_edges.append(e)

            # offset memlets where necessary
            if subgraph_contains_data[node.data]:
                # get min_offset
                min_offset = min_offsets[node.data]
                # re-add invariant dimensions with offset 0
                for iedge in in_edges:
                    for edge in graph.memlet_tree(iedge):
                        if edge.data.data == node.data:
                            edge.data.subset.offset(min_offset, True)
                        elif edge.data.other_subset:
                            edge.data.other_subset.offset(min_offset, True)

                for cedge in inter_edges:
                    for edge in graph.memlet_tree(cedge):
                        if edge.data.data == node.data:
                            edge.data.subset.offset(min_offset, True)
                        elif edge.data.other_subset:
                            edge.data.other_subset.offset(min_offset, True)

                # if in_edges has several entries:
                # put other_subset into out_edges for correctness
                if len(in_edges) > 1:
                    for oedge in out_edges:
                        oedge.data.other_subset = dcpy(oedge.data.subset)
                        oedge.data.other_subset.offset(min_offset, True)

            # also correct memlets of created transient
            if node in transients_created:
                transient_in_edges = graph.in_edges(transients_created[node])
                transient_out_edges = graph.out_edges(transients_created[node])
                for edge in chain(transient_in_edges, transient_out_edges):
                    for e in graph.memlet_tree(edge):
                        if e.data.data == node.data:
                            e.data.data += '_OUT'

        # do one last pass to correct outside memlets adjacent to global map
        for out_connector in global_map_entry.out_connectors:
            # find corresponding in_connector
            # and the in-connecting edge
            in_connector = 'IN' + out_connector[3:]
            for iedge in graph.in_edges(global_map_entry):
                if iedge.dst_conn == in_connector:
                    in_edge = iedge

            # find corresponding out_connector
            # and all out-connecting edges that belong to it
            # count them
            oedge_counter = 0
            for oedge in graph.out_edges(global_map_entry):
                if oedge.src_conn == out_connector:
                    out_edge = oedge
                    oedge_counter += 1

            # do memlet propagation
            # if there are several out edges, else there is no need

            if oedge_counter > 1:
                memlet_out = propagate_memlet(dfg_state=graph,
                                              memlet=out_edge.data,
                                              scope_node=global_map_entry,
                                              union_inner_edges=True)
                # override number of accesses
                in_edge.data.volume = memlet_out.volume
                in_edge.data.subset = memlet_out.subset

        # create a hook for outside access to global_map
        self._global_map_entry = global_map_entry
Esempio n. 27
0
class Tasklet(CodeNode):
    """ A node that contains a tasklet: a functional computation procedure
        that can only access external data specified using connectors.

        Tasklets may be implemented in Python, C++, or any supported
        language by the code generator.
    """

    code = CodeProperty(desc="Tasklet code", default=CodeBlock(""))
    debuginfo = DebugInfoProperty()

    instrument = Property(
        choices=dtypes.InstrumentationType,
        desc="Measure execution statistics with given method",
        default=dtypes.InstrumentationType.No_Instrumentation)

    def __init__(self,
                 label,
                 inputs=None,
                 outputs=None,
                 code="",
                 language=dtypes.Language.Python,
                 location=None,
                 debuginfo=None):
        super(Tasklet, self).__init__(label, location, inputs, outputs)

        self.code = CodeBlock(code, language)
        self.debuginfo = debuginfo

    @property
    def language(self):
        return self.code.language

    @staticmethod
    def from_json(json_obj, context=None):
        ret = Tasklet("dummylabel")
        dace.serialize.set_properties_from_json(ret, json_obj, context=context)
        return ret

    @property
    def name(self):
        return self._label

    def validate(self, sdfg, state):
        if not dtypes.validate_name(self.label):
            raise NameError('Invalid tasklet name "%s"' % self.label)
        for in_conn in self.in_connectors:
            if not dtypes.validate_name(in_conn):
                raise NameError('Invalid input connector "%s"' % in_conn)
        for out_conn in self.out_connectors:
            if not dtypes.validate_name(out_conn):
                raise NameError('Invalid output connector "%s"' % out_conn)

    @property
    def free_symbols(self) -> Set[str]:
        return self.code.get_free_symbols(self.in_connectors.keys()
                                          | self.out_connectors.keys())

    def infer_connector_types(self, sdfg, state):
        # If a Python tasklet, use type inference to figure out all None output
        # connectors
        if all(cval.type is not None for cval in self.out_connectors.values()):
            return
        if self.code.language != dtypes.Language.Python:
            return

        if any(cval.type is None for cval in self.in_connectors.values()):
            raise TypeError('Cannot infer output connectors of tasklet "%s", '
                            'not all input connectors have types' % str(self))

        # Avoid import loop
        from dace.codegen.tools.type_inference import infer_types

        # Get symbols defined at beginning of node, and infer all types in
        # tasklet
        syms = state.symbols_defined_at(self)
        syms.update(self.in_connectors)
        new_syms = infer_types(self.code.code, syms)
        for cname, oconn in self.out_connectors.items():
            if oconn.type is None:
                if cname not in new_syms:
                    raise TypeError('Cannot infer type of tasklet %s output '
                                    '"%s", please specify manually.' %
                                    (self.label, cname))
                self.out_connectors[cname] = new_syms[cname]

    def __str__(self):
        if not self.label:
            return "--Empty--"
        else:
            return self.label
Esempio n. 28
0
class Array(Data):
    """ Array/constant descriptor (dimensions, type and other properties). """

    # Properties
    allow_conflicts = Property(dtype=bool)
    # TODO: Should we use a Code property here?
    materialize_func = Property(dtype=str,
                                allow_none=True,
                                setter=set_materialize_func)
    access_order = Property(dtype=tuple)
    strides = Property(dtype=list)
    offset = Property(dtype=list)
    may_alias = Property(dtype=bool,
                         default=False,
                         desc='This pointer may alias with other pointers in '
                         'the same function')

    def __init__(self,
                 dtype,
                 shape,
                 materialize_func=None,
                 transient=False,
                 allow_conflicts=False,
                 storage=dace.types.StorageType.Default,
                 location='',
                 access_order=None,
                 strides=None,
                 offset=None,
                 may_alias=False,
                 toplevel=False,
                 debuginfo=None):

        super(Array, self).__init__(dtype, shape, transient, storage, location,
                                    toplevel, debuginfo)

        if shape is None:
            raise IndexError('Shape must not be None')

        self.allow_conflicts = allow_conflicts
        self.materialize_func = materialize_func
        self.may_alias = may_alias

        if access_order is not None:
            self.access_order = cp.copy(access_order)
        else:
            self.access_order = tuple(i for i in range(len(shape)))

        if strides is not None:
            self.strides = cp.copy(strides)
        else:
            self.strides = cp.copy(list(shape))

        if offset is not None:
            self.offset = cp.copy(offset)
        else:
            self.offset = [0] * len(shape)

        self.validate()

    def __repr__(self):
        return 'Array (dtype=%s, shape=%s)' % (self.dtype, self.shape)

    def clone(self):
        return Array(self.dtype, self.shape, self.materialize_func,
                     self.transient, self.allow_conflicts, self.storage,
                     self.location, self.access_order, self.strides,
                     self.offset, self.may_alias, self.toplevel,
                     self.debuginfo)

    def validate(self):
        super(Array, self).validate()
        if len(self.access_order) != len(self.shape):
            raise TypeError('Access order must be the same size as shape')

        if len(self.strides) != len(self.shape):
            raise TypeError('Strides must be the same size as shape')

        if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol,
                                  symbolic.sympy.Basic))
               for s in self.strides):
            raise TypeError('Strides must be a list or tuple of integer '
                            'values or symbols')

        if len(self.offset) != len(self.shape):
            raise TypeError('Offset must be the same size as shape')

    def covers_range(self, rng):
        if len(rng) != len(self.shape):
            return False

        for s, (rb, re, rs) in zip(self.shape, rng):
            # Shape has to be positive
            if isinstance(s, sympy.Basic):
                olds = s
                if 'positive' in s.assumptions0:
                    s = sympy.Symbol(str(s), **s.assumptions0)
                else:
                    s = sympy.Symbol(str(s), positive=True, **s.assumptions0)
                if isinstance(rb, sympy.Basic):
                    rb = rb.subs({olds: s})
                if isinstance(re, sympy.Basic):
                    re = re.subs({olds: s})
                if isinstance(rs, sympy.Basic):
                    rs = rs.subs({olds: s})

            try:
                if rb < 0:  # Negative offset
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0),
                #      'If this expression is false, please refine symbol definitions in the program.')
            try:
                if re > s:  # Beyond shape
                    return False
            except TypeError:  # cannot determine truth value of Relational
                pass
                #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s),
                #      'If this expression is false, please refine symbol definitions in the program.')

        return True

    # Checks for equivalent shape and type
    def is_equivalent(self, other):
        if not isinstance(other, Array):
            return False

        # Test type
        if self.dtype != other.type:
            return False

        # Test dimensionality
        if len(self.shape) != len(other.shape):
            return False

        # Test shape
        for dim, otherdim in zip(self.shape, other.shape):
            # If both are symbols, ensure equality
            if symbolic.issymbolic(dim) and symbolic.issymbolic(otherdim):
                if dim != otherdim:
                    return False

            # If one is a symbol and the other is a constant
            # make sure they are equivalent
            elif symbolic.issymbolic(otherdim):
                if symbolic.eval(otherdim) != dim:
                    return False
            elif symbolic.issymbolic(dim):
                if symbolic.eval(dim) != otherdim:
                    return False
            else:
                # Any other case (constant vs. constant), check for equality
                if otherdim != dim:
                    return False
        return True

    def signature(self, with_types=True, for_call=False, name=None):
        arrname = name
        if self.materialize_func is not None:
            if for_call:
                return 'nullptr'
            if not with_types:
                return arrname
            arrname = '/* ' + arrname + ' (immaterial) */'

        if not with_types or for_call:
            return arrname
        if self.may_alias:
            return str(self.dtype.ctype) + ' *' + arrname
        return str(self.dtype.ctype) + ' * __restrict__ ' + arrname

    def sizes(self):
        return [
            d.name if isinstance(d, symbolic.symbol) else str(d)
            for d in self.shape
        ]
Esempio n. 29
0
class Map(object):
    """ A Map is a two-node representation of parametric graphs, containing
        an integer set by which the contents (nodes dominated by an entry
        node and post-dominated by an exit node) are replicated.

        Maps contain a `schedule` property, which specifies how the scope
        should be scheduled (execution order). Code generators can use the
        schedule property to generate appropriate code, e.g., GPU kernels.
    """

    # List of (editable) properties
    label = Property(dtype=str, desc="Label of the map")
    params = ListProperty(element_type=str, desc="Mapped parameters")
    range = RangeProperty(desc="Ranges of map parameters",
                          default=sbs.Range([]))
    schedule = Property(dtype=dtypes.ScheduleType,
                        desc="Map schedule",
                        choices=dtypes.ScheduleType,
                        from_string=lambda x: dtypes.ScheduleType[x],
                        default=dtypes.ScheduleType.Default)
    unroll = Property(dtype=bool, desc="Map unrolling")
    collapse = Property(dtype=int,
                        default=1,
                        desc="How many dimensions to"
                        " collapse into the parallel range")
    debuginfo = DebugInfoProperty()
    is_collapsed = Property(dtype=bool,
                            desc="Show this node/scope/state as collapsed",
                            default=False)

    instrument = Property(
        choices=dtypes.InstrumentationType,
        desc="Measure execution statistics with given method",
        default=dtypes.InstrumentationType.No_Instrumentation)

    def __init__(self,
                 label,
                 params,
                 ndrange,
                 schedule=dtypes.ScheduleType.Default,
                 unroll=False,
                 collapse=1,
                 fence_instrumentation=False,
                 debuginfo=None):
        super(Map, self).__init__()

        # Assign properties
        self.label = label
        self.schedule = schedule
        self.unroll = unroll
        self.collapse = 1
        self.params = params
        self.range = ndrange
        self.debuginfo = debuginfo
        self._fence_instrumentation = fence_instrumentation

    def __str__(self):
        return self.label + "[" + ", ".join([
            "{}={}".format(i, r)
            for i, r in zip(self._params,
                            [sbs.Range.dim_to_string(d) for d in self._range])
        ]) + "]"

    def validate(self, sdfg, state, node):
        if not dtypes.validate_name(self.label):
            raise NameError('Invalid map name "%s"' % self.label)

    def get_param_num(self):
        """ Returns the number of map dimension parameters/symbols. """
        return len(self.params)
Esempio n. 30
0
class MultiExpansion(transformation.SubgraphTransformation):
    ''' 
    Implements the MultiExpansion transformation.
    Takes all the lowest scope maps in a given subgraph,
    for each of these maps splits it into an outer and inner map,
    where the outer map contains the common ranges of all maps,
    and the inner map the rest.
    Map access variables and memlets are changed accordingly
    '''

    debug = Property(dtype=bool, desc="Debug Mode", default=False)
    sequential_innermaps = Property(dtype=bool,
                                    desc="Make all inner maps that are"
                                    "created during expansion sequential",
                                    default=False)

    @staticmethod
    def can_be_applied(sdfg: SDFG, subgraph: SubgraphView) -> bool:
        ### get lowest scope maps of subgraph
        # grab first node and see whether all nodes are in the same graph
        # (or nested sdfgs therein)

        graph = subgraph.graph

        for node in subgraph.nodes():
            if node not in graph.nodes():
                return False

        # next, get all the maps
        maps = helpers.get_highest_scope_maps(sdfg, graph, subgraph)
        brng = helpers.common_map_base_ranges(maps)

        # if leq than one map found -> fail
        if len(maps) <= 1:
            return False

        # see whether they have common parameters; if not -> fail
        if len(brng) == 0:
            return False

        return True

    def apply(self, sdfg, map_base_variables=None):
        # get lowest scope map entries and expand
        subgraph = self.subgraph_view(sdfg)
        graph = subgraph.graph

        # next, get all the base maps and expand
        maps = helpers.get_highest_scope_maps(sdfg, graph, subgraph)
        self.expand(sdfg, graph, maps, map_base_variables=map_base_variables)

    def expand(self, sdfg, graph, map_entries, map_base_variables=None):
        """
        Expansion into outer and inner maps for each map in a specified set.
        The resulting outer maps all have same range and indices, corresponding
        variables and memlets get changed accordingly. The inner map contains
        the leftover dimensions
        :param sdfg: Underlying SDFG
        :param graph: Graph in which we expand
        :param map_entries: List of Map Entries(Type MapEntry) that we want to expand
        :param map_base_variables: Optional parameter. List of strings
                                   If None, then expand() searches for the maximal amount
                                   of equal map ranges and pushes those and their corresponding
                                   loop variables into the outer loop.
                                   If specified, then expand() pushes the ranges belonging
                                   to the loop iteration variables specified into the outer loop
                                   (For instance map_base_variables = ['i','j'] assumes that
                                   all maps have common iteration indices i and j with corresponding
                                   correct ranges)
        """

        maps = [entry.map for entry in map_entries]

        if not map_base_variables:
            # find the maximal subset of variables to expand
            # greedy if there exist multiple ranges that are equal in a map

            map_base_ranges = helpers.common_map_base_ranges(maps)
            reassignments = helpers.find_reassignment(maps, map_base_ranges)

            ##### first, regroup and reassign
            # create params_dict for every map
            # first, let us define the outer iteration variable names,
            # just take the first map and their indices at common ranges
            map_base_variables = []
            for rng in map_base_ranges:
                for i in range(len(maps[0].params)):
                    if maps[0].range[i] == rng and maps[0].params[
                            i] not in map_base_variables:
                        map_base_variables.append(maps[0].params[i])
                        break

            params_dict = {}
            if self.debug:
                print("MultiExpansion::Map_base_variables:",
                      map_base_variables)
                print("MultiExpansion::Map_base_ranges:", map_base_ranges)
            for map in maps:
                # for each map create param dict, first assign identity
                params_dict_map = {param: param for param in map.params}
                # now look for the correct reassignment
                # for every element neq -1, need to change param to map_base_variables[]
                # if param already appears in own dict, do a swap
                # else we just replace it
                for i, reassignment in enumerate(reassignments[map]):
                    if reassignment == -1:
                        # nothing to do
                        pass
                    else:
                        current_var = map.params[i]
                        current_assignment = params_dict_map[current_var]
                        target_assignment = map_base_variables[reassignment]
                        if current_assignment != target_assignment:
                            if target_assignment in params_dict_map.values():
                                # do a swap
                                key1 = current_var
                                for key, value in params_dict_map.items():
                                    if value == target_assignment:
                                        key2 = key

                                value1 = params_dict_map[key1]
                                value2 = params_dict_map[key2]
                                params_dict_map[key1] = key2
                                params_dict_map[key2] = key1
                            else:
                                # just reassign
                                params_dict_map[
                                    current_var] = target_assignment

                # done, assign params_dict_map to the global one
                params_dict[map] = params_dict_map

            for map, map_entry in zip(maps, map_entries):
                map_scope = graph.scope_subgraph(map_entry)
                params_dict_map = params_dict[map]
                for firstp, secondp in params_dict_map.items():
                    if firstp != secondp:
                        replace(map_scope, firstp, '__' + firstp + '_fused')
                for firstp, secondp in params_dict_map.items():
                    if firstp != secondp:
                        replace(map_scope, '__' + firstp + '_fused', secondp)

                # now also replace the map variables inside maps
                for i in range(len(map.params)):
                    map.params[i] = params_dict_map[map.params[i]]

            if self.debug:
                print("MultiExpansion::Params replaced")

        else:
            # just calculate map_base_ranges
            # do a check whether all maps correct
            map_base_ranges = []

            map0 = maps[0]
            for var in map_base_variables:
                index = map0.params.index(var)
                map_base_ranges.append(map0.range[index])

            for map in maps:
                for var, rng in zip(map_base_variables, map_base_ranges):
                    assert map.range[map.params.index(var)] == rng

        # then expand all the maps
        for map, map_entry in zip(maps, map_entries):
            if map.get_param_num() == len(map_base_variables):
                # nothing to expand, continue
                continue

            map_exit = graph.exit_node(map_entry)
            # create two new maps, outer and inner
            params_outer = map_base_variables
            ranges_outer = map_base_ranges

            init_params_inner = []
            init_ranges_inner = []
            for param, rng in zip(map.params, map.range):
                if param in map_base_variables:
                    continue
                else:
                    init_params_inner.append(param)
                    init_ranges_inner.append(rng)

            params_inner = init_params_inner
            ranges_inner = subsets.Range(init_ranges_inner)
            inner_map = nodes.Map(label = map.label + '_inner',
                                  params = params_inner,
                                  ndrange = ranges_inner,
                                  schedule = dtypes.ScheduleType.Sequential \
                                             if self.sequential_innermaps \
                                             else dtypes.ScheduleType.Default)

            map.label = map.label + '_outer'
            map.params = params_outer
            map.range = ranges_outer

            # create new map entries and exits
            map_entry_inner = nodes.MapEntry(inner_map)
            map_exit_inner = nodes.MapExit(inner_map)

            # analogously to Map_Expansion
            for edge in graph.out_edges(map_entry):
                graph.remove_edge(edge)
                graph.add_memlet_path(map_entry,
                                      map_entry_inner,
                                      edge.dst,
                                      src_conn=edge.src_conn,
                                      memlet=edge.data,
                                      dst_conn=edge.dst_conn)

            dynamic_edges = dynamic_map_inputs(graph, map_entry)
            for edge in dynamic_edges:
                # Remove old edge and connector
                graph.remove_edge(edge)
                edge.dst._in_connectors.remove(edge.dst_conn)

                # Propagate to each range it belongs to
                path = []
                for mapnode in [map_entry, map_entry_inner]:
                    path.append(mapnode)
                    if any(edge.dst_conn in map(str, symbolic.symlist(r))
                           for r in mapnode.map.range):
                        graph.add_memlet_path(edge.src,
                                              *path,
                                              memlet=edge.data,
                                              src_conn=edge.src_conn,
                                              dst_conn=edge.dst_conn)

            for edge in graph.in_edges(map_exit):
                graph.remove_edge(edge)
                graph.add_memlet_path(edge.src,
                                      map_exit_inner,
                                      map_exit,
                                      memlet=edge.data,
                                      src_conn=edge.src_conn,
                                      dst_conn=edge.dst_conn)