class Transformation(object): """ Base class for transformations, as well as a static registry of transformations, where new transformations can be added in a decentralized manner. New transformations are registered with ``Transformation.register`` (or ``dace.registry.autoregister_params``) with two optional boolean keyword arguments: ``singlestate`` (default: False) and ``strict`` (default: False). If ``singlestate`` is True, the transformation is matched on subgraphs inside an SDFGState; otherwise, subgraphs of the SDFG state machine are matched. If ``strict`` is True, this transformation will be considered strict (i.e., always beneficial to perform) and will be performed automatically as part of SDFG strict transformations. """ # Properties sdfg_id = Property(dtype=int, category="(Debug)") state_id = Property(dtype=int, category="(Debug)") _subgraph = DictProperty(key_type=int, value_type=int, category="(Debug)") expr_index = Property(dtype=int, category="(Debug)") @staticmethod def annotates_memlets(): """ Indicates whether the transformation annotates the edges it creates or modifies with the appropriate memlets. This determines whether to apply memlet propagation after the transformation. """ return False @staticmethod def expressions(): """ Returns a list of Graph objects that will be matched in the subgraph isomorphism phase. Used as a pre-pass before calling `can_be_applied`. @see Transformation.can_be_applied """ raise NotImplementedError @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): """ Returns True if this transformation can be applied on the candidate matched subgraph. :param graph: SDFGState object if this Transformation is single-state, or SDFG object otherwise. :param candidate: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. :param expr_index: The list index from `Transformation.expressions` that was matched. :param sdfg: If `graph` is an SDFGState, its parent SDFG. Otherwise should be equal to `graph`. :param strict: Whether transformation should run in strict mode. :return: True if the transformation can be applied. """ raise NotImplementedError @staticmethod def match_to_str(graph, candidate): """ Returns a string representation of the pattern match on the candidate subgraph. Used when identifying matches in the console UI. """ raise NotImplementedError def __init__(self, sdfg_id, state_id, subgraph, expr_index): """ Initializes an instance of Transformation. :param sdfg_id: A unique ID of the SDFG. :param state_id: The node ID of the SDFG state, if applicable. :param subgraph: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. :param expr_index: The list index from `Transformation.expressions` that was matched. :raise TypeError: When transformation is not subclass of Transformation. :raise TypeError: When state_id is not instance of int. :raise TypeError: When subgraph is not a dict of dace.sdfg.nodes.Node : int. """ self.sdfg_id = sdfg_id self.state_id = state_id for value in subgraph.values(): if not isinstance(value, int): raise TypeError('All values of ' 'subgraph' ' dictionary must be ' 'instances of int.') # Serializable subgraph with node IDs as keys expr = self.expressions()[expr_index] self._subgraph = {expr.node_id(k): v for k, v in subgraph.items()} self._subgraph_user = subgraph self.expr_index = expr_index @property def subgraph(self): return self._subgraph_user def __lt__(self, other): """ Comparing two transformations by their class name and node IDs in match. Used for ordering transformations consistently. """ if type(self) != type(other): return type(self).__name__ < type(other).__name__ self_ids = iter(self.subgraph.values()) other_ids = iter(self.subgraph.values()) try: self_id = next(self_ids) except StopIteration: return True try: other_id = next(other_ids) except StopIteration: return False self_end = False while self_id is not None and other_id is not None: if self_id != other_id: return self_id < other_id try: self_id = next(self_ids) except StopIteration: self_end = True try: other_id = next(other_ids) except StopIteration: if self_end: # Transformations are equal return False return False if self_end: return True def apply_pattern(self, sdfg): """ Applies this transformation on the given SDFG. """ self.apply(sdfg) if not self.annotates_memlets(): propagation.propagate_memlets_sdfg(sdfg) def __str__(self): return type(self).__name__ def modifies_graph(self): return True def print_match(self, sdfg): """ Returns a string representation of the pattern match on the given SDFG. Used for printing matches in the console UI. """ if not isinstance(sdfg, dace.SDFG): raise TypeError("Expected SDFG, got: {}".format( type(sdfg).__name__)) if self.state_id == -1: graph = sdfg else: graph = sdfg.nodes()[self.state_id] string = type(self).__name__ + ' in ' string += type(self).match_to_str(graph, self.subgraph) return string def to_json(self, parent=None): props = dace.serialize.all_properties_to_json(self) return { 'type': 'Transformation', 'transformation': type(self).__name__, **props } @staticmethod def from_json(json_obj, context=None): xform = next(ext for ext in Transformation.extensions().keys() if ext.__name__ == json_obj['transformation']) # Recreate subgraph expr = xform.expressions()[json_obj['expr_index']] subgraph = { expr.node(int(k)): int(v) for k, v in json_obj['_subgraph'].items() } # Reconstruct transformation ret = xform(json_obj['sdfg_id'], json_obj['state_id'], subgraph, json_obj['expr_index']) context = context or {} context['transformation'] = ret dace.serialize.set_properties_from_json( ret, json_obj, context=context, ignore_properties={'transformation', 'type'}) return ret
class AccessNode(Node): """ A node that accesses data in the SDFG. Denoted by a circular shape. """ access = Property(choices=dtypes.AccessType, desc="Type of access to this array", default=dtypes.AccessType.ReadWrite) setzero = Property(dtype=bool, desc="Initialize to zero", default=False) debuginfo = DebugInfoProperty() data = DataProperty(desc="Data (array, stream, scalar) to access") def __init__(self, data, access=dtypes.AccessType.ReadWrite, debuginfo=None): super(AccessNode, self).__init__() # Properties self.debuginfo = debuginfo self.access = access if not isinstance(data, str): raise TypeError('Data for AccessNode must be a string') self.data = data @staticmethod def from_json(json_obj, context=None): ret = AccessNode("Nodata") dace.serialize.set_properties_from_json(ret, json_obj, context=context) return ret def __deepcopy__(self, memo): node = object.__new__(AccessNode) node._access = self._access node._data = self._data node._setzero = self._setzero node._in_connectors = dcpy(self._in_connectors, memo=memo) node._out_connectors = dcpy(self._out_connectors, memo=memo) node._debuginfo = dcpy(self._debuginfo, memo=memo) return node @property def label(self): return self.data def __label__(self, sdfg, state): return self.data def desc(self, sdfg): from dace.sdfg import SDFGState, ScopeSubgraphView if isinstance(sdfg, (SDFGState, ScopeSubgraphView)): sdfg = sdfg.parent return sdfg.arrays[self.data] def validate(self, sdfg, state): if self.data not in sdfg.arrays: raise KeyError('Array "%s" not found in SDFG' % self.data) def has_writes(self, state): for e in state.in_edges(self): if not e.data.is_empty(): return True return False def has_reads(self, state): for e in state.out_edges(self): if not e.data.is_empty(): return True return False
class NestedSDFG(CodeNode): """ An SDFG state node that contains an SDFG of its own, runnable using the data dependencies specified using its connectors. It is encouraged to use nested SDFGs instead of coarse-grained tasklets since they are analyzable with respect to transformations. @note: A nested SDFG cannot create recursion (one of its parent SDFGs). """ # NOTE: We cannot use SDFG as the type because of an import loop sdfg = SDFGReferenceProperty(desc="The SDFG", allow_none=True) schedule = Property(dtype=dtypes.ScheduleType, desc="SDFG schedule", allow_none=True, choices=dtypes.ScheduleType, from_string=lambda x: dtypes.ScheduleType[x], default=dtypes.ScheduleType.Default) symbol_mapping = DictProperty( key_type=str, value_type=dace.symbolic.pystr_to_symbolic, desc="Mapping between internal symbols and their values, expressed as " "symbolic expressions") debuginfo = DebugInfoProperty() is_collapsed = Property(dtype=bool, desc="Show this node/scope/state as collapsed", default=False) instrument = Property( choices=dtypes.InstrumentationType, desc="Measure execution statistics with given method", default=dtypes.InstrumentationType.No_Instrumentation) no_inline = Property( dtype=bool, desc="If True, this nested SDFG will not be inlined in strict mode " "(in the InlineSDFG transformation)", default=False) def __init__(self, label, sdfg, inputs: Set[str], outputs: Set[str], symbol_mapping: Dict[str, Any] = None, schedule=dtypes.ScheduleType.Default, location=None, debuginfo=None): from dace.sdfg import SDFG super(NestedSDFG, self).__init__(label, location, inputs, outputs) # Properties self.sdfg: SDFG = sdfg self.symbol_mapping = symbol_mapping or {} self.schedule = schedule self.debuginfo = debuginfo @staticmethod def from_json(json_obj, context=None): from dace import SDFG # Avoid import loop # We have to load the SDFG first. ret = NestedSDFG("nolabel", SDFG('nosdfg'), {}, {}) dace.serialize.set_properties_from_json(ret, json_obj, context) if context and 'sdfg_state' in context: ret.sdfg.parent = context['sdfg_state'] if context and 'sdfg' in context: ret.sdfg.parent_sdfg = context['sdfg'] ret.sdfg.parent_nsdfg_node = ret ret.sdfg.update_sdfg_list([]) return ret @property def free_symbols(self) -> Set[str]: return set().union( *(map(str, pystr_to_symbolic(v).free_symbols) for v in self.symbol_mapping.values()), *(map(str, pystr_to_symbolic(v).free_symbols) for v in self.location.values())) def infer_connector_types(self, sdfg, state): # Avoid import loop from dace.sdfg.infer_types import infer_connector_types # Infer internal connector types infer_connector_types(self.sdfg) def __str__(self): if not self.label: return "SDFG" else: return self.label def validate(self, sdfg, state): if not dtypes.validate_name(self.label): raise NameError('Invalid nested SDFG name "%s"' % self.label) for in_conn in self.in_connectors: if not dtypes.validate_name(in_conn): raise NameError('Invalid input connector "%s"' % in_conn) for out_conn in self.out_connectors: if not dtypes.validate_name(out_conn): raise NameError('Invalid output connector "%s"' % out_conn) connectors = self.in_connectors.keys() | self.out_connectors.keys() for dname, desc in self.sdfg.arrays.items(): # TODO(later): Disallow scalars without access nodes (so that this # check passes for them too). if isinstance(desc, data.Scalar): continue if not desc.transient and dname not in connectors: raise NameError('Data descriptor "%s" not found in nested ' 'SDFG connectors' % dname) if dname in connectors and desc.transient: raise NameError( '"%s" is a connector but its corresponding array is transient' % dname) # Validate undefined symbols symbols = set(k for k in self.sdfg.free_symbols if k not in connectors) missing_symbols = [s for s in symbols if s not in self.symbol_mapping] if missing_symbols: raise ValueError('Missing symbols on nested SDFG: %s' % (missing_symbols)) extra_symbols = self.symbol_mapping.keys() - symbols if len(extra_symbols) > 0: # TODO: Elevate to an error? warnings.warn( f"{self.label} maps to unused symbol(s): {extra_symbols}") # Recursively validate nested SDFG self.sdfg.validate()
class SubgraphTransformation(TransformationBase): """ Base class for transformations that apply on arbitrary subgraphs, rather than matching a specific pattern. Subclasses need to implement the `can_be_applied` and `apply` operations, as well as registered with the subclass registry. See the `Transformation` class docstring for more information. """ sdfg_id = Property(dtype=int, desc='ID of SDFG to transform') state_id = Property( dtype=int, desc='ID of state to transform subgraph within, or -1 to transform the ' 'SDFG') subgraph = SetProperty(element_type=int, desc='Subgraph in transformation instance') def __init__(self, subgraph: Union[Set[int], gr.SubgraphView], sdfg_id: int = None, state_id: int = None): if (not isinstance(subgraph, (gr.SubgraphView, SDFG, SDFGState)) and (sdfg_id is None or state_id is None)): raise TypeError( 'Subgraph transformation either expects a SubgraphView or a ' 'set of node IDs, SDFG ID and state ID (or -1).') # An entire graph is given as a subgraph if isinstance(subgraph, (SDFG, SDFGState)): subgraph = gr.SubgraphView(subgraph, subgraph.nodes()) if isinstance(subgraph, gr.SubgraphView): self.subgraph = set( subgraph.graph.node_id(n) for n in subgraph.nodes()) if isinstance(subgraph.graph, SDFGState): sdfg = subgraph.graph.parent self.sdfg_id = sdfg.sdfg_id self.state_id = sdfg.node_id(subgraph.graph) elif isinstance(subgraph.graph, SDFG): self.sdfg_id = subgraph.graph.sdfg_id self.state_id = -1 else: raise TypeError('Unrecognized graph type "%s"' % type(subgraph.graph).__name__) else: self.subgraph = subgraph self.sdfg_id = sdfg_id self.state_id = state_id def subgraph_view(self, sdfg: SDFG) -> gr.SubgraphView: graph = sdfg.sdfg_list[self.sdfg_id] if self.state_id != -1: graph = graph.node(self.state_id) return gr.SubgraphView(graph, [graph.node(idx) for idx in self.subgraph]) def can_be_applied(self, sdfg: SDFG, subgraph: gr.SubgraphView) -> bool: """ Tries to match the transformation on a given subgraph, returning True if this transformation can be applied. :param sdfg: The SDFG that includes the subgraph. :param subgraph: The SDFG or state subgraph to try to apply the transformation on. :return: True if the subgraph can be transformed, or False otherwise. """ pass def apply(self, sdfg: SDFG): """ Applies the transformation on the given subgraph. :param sdfg: The SDFG that includes the subgraph. """ pass @classmethod def apply_to(cls, sdfg: SDFG, *where: Union[nd.Node, SDFGState, gr.SubgraphView], verify: bool = True, **options: Any): """ Applies this transformation to a given subgraph, defined by a set of nodes. Raises an error if arguments are invalid or transformation is not applicable. To apply the transformation on a specific subgraph, the `where` parameter can be used either on a subgraph object (`SubgraphView`), or on directly on a list of subgraph nodes, given as `Node` or `SDFGState` objects. Transformation properties can then be given as keyword arguments. For example, applying `SubgraphFusion` on a subgraph of three nodes can be called in one of two ways: ``` # Subgraph SubgraphFusion.apply_to( sdfg, SubgraphView(state, [node_a, node_b, node_c])) # Simplified API: list of nodes SubgraphFusion.apply_to(sdfg, node_a, node_b, node_c) ``` :param sdfg: The SDFG to apply the transformation to. :param where: A set of nodes in the SDFG/state, or a subgraph thereof. :param verify: Check that `can_be_applied` returns True before applying. :param options: A set of parameters to use for applying the transformation. """ subgraph = None if len(where) == 1: if isinstance(where[0], (list, tuple)): where = where[0] elif isinstance(where[0], gr.SubgraphView): subgraph = where[0] if len(where) == 0: raise ValueError('At least one node is required') # Check that all keyword arguments are nodes and if interstate or not if subgraph is None: sample_node = where[0] if isinstance(sample_node, SDFGState): graph = sdfg state_id = -1 elif isinstance(sample_node, nd.Node): graph = next(s for s in sdfg.nodes() if sample_node in s.nodes()) state_id = sdfg.node_id(graph) else: raise TypeError('Invalid node type "%s"' % type(sample_node).__name__) # Construct subgraph and instantiate transformation subgraph = gr.SubgraphView(graph, where) instance = cls(subgraph, sdfg.sdfg_id, state_id) else: # Construct instance from subgraph directly instance = cls(subgraph) # Construct transformation parameters for optname, optval in options.items(): if not optname in cls.__properties__: raise ValueError('Property "%s" not found in transformation' % optname) setattr(instance, optname, optval) if verify: if not instance.can_be_applied(sdfg, subgraph): raise ValueError('Transformation cannot be applied on the ' 'given subgraph ("can_be_applied" failed)') # Apply to SDFG return instance.apply(sdfg) def to_json(self, parent=None): props = serialize.all_properties_to_json(self) return { 'type': 'SubgraphTransformation', 'transformation': type(self).__name__, **props } @staticmethod def from_json(json_obj: Dict[str, Any], context: Dict[str, Any] = None) -> 'SubgraphTransformation': xform = next(ext for ext in SubgraphTransformation.extensions().keys() if ext.__name__ == json_obj['transformation']) # Reconstruct transformation ret = xform(json_obj['subgraph'], json_obj['sdfg_id'], json_obj['state_id']) context = context or {} context['transformation'] = ret serialize.set_properties_from_json( ret, json_obj, context=context, ignore_properties={'transformation', 'type'}) return ret
class Pipeline(Map): """ This a convenience-subclass of Map that allows easier implementation of loop nests (using regular Map indices) that need a constant-sized initialization and drain phase (e.g., N*M + c iterations), which would otherwise need a flattened one-dimensional map. """ init_size = SymbolicProperty(default=0, desc="Number of initialization iterations.") init_overlap = Property( dtype=bool, default=True, desc="Whether to increment regular map indices during initialization.") drain_size = SymbolicProperty(default=1, desc="Number of drain iterations.") drain_overlap = Property( dtype=bool, default=True, desc="Whether to increment regular map indices during pipeline drain.") additional_iterators = Property( dtype=dict, desc="Additional iterators, managed by the user inside the scope.") def __init__(self, *args, init_size=0, init_overlap=False, drain_size=0, drain_overlap=False, additional_iterators={}, **kwargs): super(Pipeline, self).__init__(*args, **kwargs) self.init_size = init_size self.init_overlap = init_overlap self.drain_size = drain_size self.drain_overlap = drain_overlap self.additional_iterators = additional_iterators def iterator_str(self): return "__" + "".join(self.params) def loop_bound_str(self): from dace.codegen.targets.common import sym2cpp bound = 1 for begin, end, step in self.range: bound *= (step + end - begin) // step # Add init and drain phases when relevant add_str = (" + " + sym2cpp(self.init_size) if self.init_size != 0 and not self.init_overlap else "") add_str += (" + " + sym2cpp(self.drain_size) if self.drain_size != 0 and not self.drain_overlap else "") return sym2cpp(bound) + add_str def init_condition(self): """Variable that can be checked to see if pipeline is currently in initialization phase.""" if self.init_size == 0: raise ValueError("No init condition exists for " + self.label) return self.iterator_str() + "_init" def drain_condition(self): """Variable that can be checked to see if pipeline is currently in draining phase.""" if self.drain_size == 0: raise ValueError("No drain condition exists for " + self.label) return self.iterator_str() + "_drain"
class Scalar(Data): """ Data descriptor of a scalar value. """ allow_conflicts = Property(dtype=bool) def __init__(self, dtype, transient=False, storage=dace.types.StorageType.Default, allow_conflicts=False, location='', toplevel=False, debuginfo=None): self.allow_conflicts = allow_conflicts shape = [1] super(Scalar, self).__init__(dtype, shape, transient, storage, location, toplevel, debuginfo) def __repr__(self): return 'Scalar (dtype=%s)' % self.dtype def clone(self): return Scalar(self.dtype, self.transient, self.storage, self.allow_conflicts, self.location, self.toplevel, self.debuginfo) @property def strides(self): return self.shape @property def offset(self): return [0] def is_equivalent(self, other): if not isinstance(other, Scalar): return False if self.dtype != other.type: return False return True def signature(self, with_types=True, for_call=False, name=None): if not with_types or for_call: return name return str(self.dtype.ctype) + ' ' + name def sizes(self): return None def covers_range(self, rng): if len(rng) != 1: return False rng = rng[0] try: if (rng[1] - rng[0]) > rng[2]: return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % ((rng[1] - rng[0]) > rng[2]), # 'If this expression is false, please refine symbol definitions in the program.') return True
class Memlet(object): """ Data movement object. Represents the data, the subset moved, and the manner it is reindexed (`other_subset`) into the destination. If there are multiple conflicting writes, this object also specifies how they are resolved with a lambda function. """ # Properties volume = SymbolicProperty(default=0, desc='The exact number of elements moved ' 'using this memlet, or the maximum number ' 'if dynamic=True (with 0 as unbounded)') dynamic = Property(default=False, desc='Is the number of elements moved determined at ' 'runtime (e.g., data dependent)') subset = SubsetProperty(allow_none=True, desc='Subset of elements to move from the data ' 'attached to this edge.') other_subset = SubsetProperty( allow_none=True, desc='Subset of elements after reindexing to the data not attached ' 'to this edge (e.g., for offsets and reshaping).') data = DataProperty(desc='Data descriptor attached to this memlet') wcr = LambdaProperty(allow_none=True, desc='If set, defines a write-conflict resolution ' 'lambda function. The syntax of the lambda function ' 'receives two elements: `current` value and `new` ' 'value, and returns the value after resolution') # Code generation and validation hints debuginfo = DebugInfoProperty(desc='Line information to track source and ' 'generated code') wcr_nonatomic = Property(dtype=bool, default=False, desc='If True, always generates non-conflicting ' '(non-atomic) writes in resulting code') allow_oob = Property(dtype=bool, default=False, desc='Bypass out-of-bounds validation') def __init__(self, expr: str = None, data: str = None, subset: Union[str, subsets.Subset] = None, other_subset: Union[str, subsets.Subset] = None, volume: Union[int, str, symbolic.SymbolicType] = None, dynamic: bool = False, wcr: Union[str, ast.AST] = None, debuginfo: dtypes.DebugInfo = None, wcr_nonatomic: bool = False, allow_oob: bool = False): """ Constructs a Memlet. :param expr: A string expression of the this memlet, given as an ease of use API. Must follow one of the following forms: 1. ``ARRAY``, 2. ``ARRAY[SUBSET]``, 3. ``ARRAY[SUBSET] -> OTHER_SUBSET``. :param data: (DEPRECATED) Data descriptor name attached to this memlet. :param subset: The subset to take from the data attached to the edge, represented either as a string or a Subset object. :param other_subset: The subset to offset into the other side of the memlet, represented either as a string or a Subset object. :param volume: The exact number of elements moved using this memlet, or the maximum number of elements if ``dynamic`` is set to True. If dynamic and this value is set to zero, the number of elements moved is runtime-defined and unbounded. :param dynamic: If True, the number of elements moved in this memlet is defined dynamically at runtime. :param wcr: A lambda function (represented as a string or Python AST) specifying how write-conflicts are resolved. The syntax of the lambda function receives two elements: ``current`` value and `new` value, and returns the value after resolution. For example, summation is represented by ``'lambda cur, new: cur + new'``. :param debuginfo: Line information from the generating source code. :param wcr_nonatomic: If True, overrides the automatic code generator decision and treat all write-conflict resolution operations as non-atomic, which might cause race conditions in the general case. :param allow_oob: If True, bypasses the checks in SDFG validation for out-of-bounds accesses in memlet subsets. """ # Will be set once memlet is added into an SDFG (in try_initialize) self._sdfg = None self._state = None self._edge = None # Field caching which subset belongs to source or destination of memlet self._is_data_src = None # Initialize first by string expression self.data = None self.subset = None self.other_subset = None if expr is not None: self._parse_memlet_from_str(expr) # Set properties self.data = self.data or data self.subset = self.subset or subset self.other_subset = self.other_subset or other_subset if volume is not None: self.volume = volume else: if self.subset is not None: self.volume = self.subset.num_elements() elif self.other_subset is not None: self.volume = self.other_subset.num_elements() else: self.volume = 1 self.dynamic = dynamic self.wcr = wcr self.wcr_nonatomic = wcr_nonatomic self.debuginfo = debuginfo self.allow_oob = allow_oob def to_json(self): attrs = dace.serialize.all_properties_to_json(self) # Fill in new values if self.src_subset is not None: attrs['src_subset'] = self.src_subset.to_json() else: attrs['src_subset'] = None if self.dst_subset is not None: attrs['dst_subset'] = self.dst_subset.to_json() else: attrs['dst_subset'] = None # Fill in legacy (DEPRECATED) values for backwards compatibility attrs['num_accesses'] = \ str(self.volume) if not self.dynamic else -1 return {"type": "Memlet", "attributes": attrs} @staticmethod def from_json(json_obj, context=None): ret = Memlet() dace.serialize.set_properties_from_json( ret, json_obj, context=context, ignore_properties={'src_subset', 'dst_subset', 'num_accesses'}) if context: ret._sdfg = context['sdfg'] ret._state = context['sdfg_state'] return ret def __deepcopy__(self, memo): node = object.__new__(Memlet) # Set properties node.volume = dcpy(self.volume, memo=memo) node._dynamic = self._dynamic node.subset = dcpy(self.subset, memo=memo) node.other_subset = dcpy(self.other_subset, memo=memo) node.data = dcpy(self.data, memo=memo) node.wcr = dcpy(self.wcr, memo=memo) node.debuginfo = dcpy(self.debuginfo, memo=memo) node._wcr_nonatomic = self._wcr_nonatomic node._allow_oob = self._allow_oob node._is_data_src = self._is_data_src # Nullify graph references node._sdfg = None node._state = None node._edge = None return node def is_empty(self) -> bool: """ Returns True if this memlet carries no data. Memlets without data are primarily used for connecting nodes to scopes without transferring data to them. """ return (self.data is None and self.src_subset is None and self.dst_subset is None) @property def num_accesses(self): """ Returns the total memory movement volume (in elements) of this memlet. """ return self.volume @num_accesses.setter def num_accesses(self, value): self.volume = value @staticmethod def simple(data, subset_str, wcr_str=None, other_subset_str=None, wcr_conflict=True, num_accesses=None, debuginfo=None, dynamic=False): """ DEPRECATED: Constructs a Memlet from string-based expressions. :param data: The data object or name to access. :type data: Either a string of the data descriptor name or an AccessNode. :param subset_str: The subset of `data` that is going to be accessed in string format. Example: '0:N'. :param wcr_str: A lambda function (as a string) specifying how write-conflicts are resolved. The syntax of the lambda function receives two elements: `current` value and `new` value, and returns the value after resolution. For example, summation is `'lambda cur, new: cur + new'`. :param other_subset_str: The reindexing of `subset` on the other connected data (as a string). :param wcr_conflict: If False, forces non-locked conflict resolution when generating code. The default is to let the code generator infer this information from the SDFG. :param num_accesses: The number of times that the moved data will be subsequently accessed. If -1, designates that the number of accesses is unknown at compile time. :param debuginfo: Source-code information (e.g., line, file) used for debugging. :param dynamic: If True, the number of elements moved in this memlet is defined dynamically at runtime. """ # warnings.warn( # 'This function is deprecated, please use the Memlet ' # 'constructor instead', DeprecationWarning) result = Memlet() if isinstance(subset_str, subsets.Subset): result.subset = subset_str else: result.subset = SubsetProperty.from_string(subset_str) result.dynamic = dynamic if num_accesses is not None: if num_accesses == -1: result.dynamic = True result.volume = 0 else: result.volume = num_accesses else: result.volume = result._subset.num_elements() if wcr_str is not None: if isinstance(wcr_str, ast.AST): result.wcr = wcr_str else: result.wcr = LambdaProperty.from_string(wcr_str) if other_subset_str is not None: if isinstance(other_subset_str, subsets.Subset): result.other_subset = other_subset_str else: result.other_subset = SubsetProperty.from_string( other_subset_str) else: result.other_subset = None # If it is an access node or another memlet if hasattr(data, 'data'): result.data = data.data else: result.data = data result.wcr_nonatomic = not wcr_conflict return result def _parse_from_subexpr(self, expr: str): if expr[-1] != ']': # No subset given, try to use whole array if not dtypes.validate_name(expr): raise SyntaxError('Invalid memlet syntax "%s"' % expr) return expr, None # array[subset] syntax arrname, subset_str = expr[:-1].split('[') if not dtypes.validate_name(arrname): raise SyntaxError('Invalid array name "%s" in memlet' % arrname) return arrname, SubsetProperty.from_string(subset_str) def _parse_memlet_from_str(self, expr: str): """ Parses a memlet and fills in either the src_subset,dst_subset fields or the _data,_subset fields. :param expr: A string expression of the this memlet, given as an ease of use API. Must follow one of the following forms: 1. ``ARRAY``, 2. ``ARRAY[SUBSET]``, 3. ``ARRAY[SUBSET] -> OTHER_SUBSET``. Note that modes 2 and 3 are deprecated and will leave the memlet uninitialized until inserted into an SDFG. """ expr = expr.strip() if '->' not in expr: # Options 1 and 2 self.data, self.subset = self._parse_from_subexpr(expr) return # Option 3 src_expr, dst_expr = expr.split('->') src_expr = src_expr.strip() dst_expr = dst_expr.strip() if '[' not in src_expr and not dtypes.validate_name(src_expr): raise SyntaxError('Expression without data name not yet allowed') self.data, self.subset = self._parse_from_subexpr(src_expr) self.other_subset = SubsetProperty.from_string(dst_expr) def try_initialize(self, sdfg: 'dace.sdfg.SDFG', state: 'dace.sdfg.SDFGState', edge: 'dace.sdfg.graph.MultiConnectorEdge'): """ Tries to initialize the internal fields of the memlet (e.g., src/dst subset) once it is added to an SDFG as an edge. """ from dace.sdfg.nodes import AccessNode, CodeNode # Avoid import loops self._sdfg = sdfg self._state = state self._edge = edge # If memlet is code->code, ensure volume=1 if (isinstance(edge.src, CodeNode) and isinstance(edge.dst, CodeNode) and self.volume == 0): self.volume = 1 # Find source/destination of memlet try: path = state.memlet_path(edge) except (ValueError, AssertionError, StopIteration): # Cannot initialize yet return is_data_src = True if isinstance(path[-1].dst, AccessNode): if path[-1].dst.data == self._data: is_data_src = False self._is_data_src = is_data_src # If subset is None, fill in with entire array if (self.data is not None and self.subset is None): self.subset = subsets.Range.from_array(sdfg.arrays[self.data]) @staticmethod def from_array(dataname, datadesc, wcr=None): """ Constructs a Memlet that transfers an entire array's contents. :param dataname: The name of the data descriptor in the SDFG. :param datadesc: The data descriptor object. :param wcr: The conflict resolution lambda. :type datadesc: Data """ rng = subsets.Range.from_array(datadesc) return Memlet.simple(dataname, rng, wcr_str=wcr) def __hash__(self): return hash( (self.volume, self.src_subset, self.dst_subset, str(self.wcr))) def __eq__(self, other): return all([ self.volume == other.volume, self.src_subset == other.src_subset, self.dst_subset == other.dst_subset, self.wcr == other.wcr ]) def replace(self, repl_dict): """ Substitute a given set of symbols with a different set of symbols. :param repl_dict: A dict of string symbol names to symbols with which to replace them. """ repl_to_intermediate = {} repl_to_final = {} for symbol in repl_dict: if str(symbol) != str(repl_dict[symbol]): intermediate = symbolic.symbol('__dacesym_' + str(symbol)) repl_to_intermediate[symbolic.symbol(symbol)] = intermediate repl_to_final[intermediate] = repl_dict[symbol] if len(repl_to_intermediate) > 0: if self.volume is not None and symbolic.issymbolic(self.volume): self.volume = self.volume.subs(repl_to_intermediate) self.volume = self.volume.subs(repl_to_final) if self.subset is not None: self.subset.replace(repl_to_intermediate) self.subset.replace(repl_to_final) if self.other_subset is not None: self.other_subset.replace(repl_to_intermediate) self.other_subset.replace(repl_to_final) def num_elements(self): """ Returns the number of elements in the Memlet subset. """ if self.subset: return self.subset.num_elements() elif self.other_subset: return self.other_subset.num_elements() return 0 def bounding_box_size(self): """ Returns a per-dimension upper bound on the maximum number of elements in each dimension. This bound will be tight in the case of Range. """ if self.src_subset: return self.src_subset.bounding_box_size() elif self.dst_subset: return self.dst_subset.bounding_box_size() return [] # New fields @property def src_subset(self): if self._is_data_src is not None: return self.subset if self._is_data_src else self.other_subset return self.subset @src_subset.setter def src_subset(self, new_src_subset): if self._is_data_src is not None: if self._is_data_src: self.subset = new_src_subset else: self.other_subset = new_src_subset else: self.subset = new_src_subset @property def dst_subset(self): if self._is_data_src is not None: return self.other_subset if self._is_data_src else self.subset return self.other_subset @dst_subset.setter def dst_subset(self, new_dst_subset): if self._is_data_src is not None: if self._is_data_src: self.other_subset = new_dst_subset else: self.subset = new_dst_subset else: self.other_subset = new_dst_subset def validate(self, sdfg, state): if self.data is not None and self.data not in sdfg.arrays: raise KeyError('Array "%s" not found in SDFG' % self.data) @property def free_symbols(self) -> Set[str]: """ Returns a set of symbols used in this edge's properties. """ # Symbolic properties are in volume, and the two subsets result = set() result |= set(map(str, self.volume.free_symbols)) if self.src_subset: result |= self.src_subset.free_symbols if self.dst_subset: result |= self.dst_subset.free_symbols return result def __label__(self, sdfg, state): """ Returns a string representation of the memlet for display in a graph. :param sdfg: The SDFG in which the memlet resides. :param state: An SDFGState object in which the memlet resides. """ if self.data is None: return self._label(None) return self._label(sdfg.arrays[self.data].shape) def __str__(self): return self._label(None) def _label(self, shape): result = '' if self.data is not None: result = self.data if self.subset is None: return result num_elements = self.subset.num_elements() if self.dynamic: result += '(dyn) ' elif self.volume != num_elements: result += '(%s) ' % SymbolicProperty.to_string(self.volume) arrayNotation = True try: if shape is not None and reduce(operator.mul, shape, 1) == 1: # Don't mention array if we're accessing a single element and it's zero if all(s == 0 for s in self.subset.min_element()): arrayNotation = False except TypeError: # Will fail if trying to check the truth value of a sympy expr pass if arrayNotation: result += '[%s]' % str(self.subset) if self.wcr is not None and str(self.wcr) != '': # Autodetect reduction type redtype = detect_reduction_type(self.wcr) if redtype == dtypes.ReductionType.Custom: wcrstr = unparse(ast.parse(self.wcr).body[0].value.body) else: wcrstr = str(redtype) wcrstr = wcrstr[wcrstr.find('.') + 1:] # Skip "ReductionType." result += ' (CR: %s)' % wcrstr if self.other_subset is not None: result += ' -> [%s]' % str(self.other_subset) return result def __repr__(self): return "Memlet (" + self.__str__() + ")"
class GPUTransformSDFG(transformation.Transformation): """ Implements the GPUTransformSDFG transformation. Transforms a whole SDFG to run on the GPU: Steps of the full GPU transform 0. Acquire metadata about SDFG and arrays 1. Replace all non-transients with their GPU counterparts 2. Copy-in state from host to GPU 3. Copy-out state from GPU to host 4. Re-store Default-top/CPU_Heap transients as GPU_Global 5. Global tasklets are wrapped with a map of size 1 6. Global Maps are re-scheduled to use the GPU 7. Make data ready for interstate edges that use them 8. Re-apply strict transformations to get rid of extra states and transients """ toplevel_trans = Property(desc="Make all GPU transients top-level", dtype=bool, default=True) register_trans = Property( desc="Make all transients inside GPU maps registers", dtype=bool, default=True) sequential_innermaps = Property(desc="Make all internal maps Sequential", dtype=bool, default=True) skip_scalar_tasklets = Property(desc="If True, does not transform tasklets " "that manipulate (Default-stored) scalars", dtype=bool, default=True) strict_transform = Property( desc='Reapply strict transformations after modifying graph', dtype=bool, default=True) exclude_copyin = Property( desc="Exclude these arrays from being copied into the device " "(comma-separated)", dtype=str, default='') exclude_tasklets = Property( desc="Exclude these tasklets from being processed as CPU tasklets " "(comma-separated)", dtype=str, default='') exclude_copyout = Property( desc="Exclude these arrays from being copied out of the device " "(comma-separated)", dtype=str, default='') @staticmethod def annotates_memlets(): # Skip memlet propagation for now return True @staticmethod def expressions(): # Matches anything return [sd.SDFG('_')] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): for node, _ in sdfg.all_nodes_recursive(): # Consume scopes are currently unsupported if isinstance(node, (nodes.ConsumeEntry, nodes.ConsumeExit)): return False for state in sdfg.nodes(): schildren = state.scope_children() for node in schildren[None]: # If two top-level tasklets are connected with a code->code # memlet, they will transform into an invalid SDFG if (isinstance(node, nodes.CodeNode) and any( isinstance(e.dst, nodes.CodeNode) for e in state.out_edges(node))): return False return True @staticmethod def match_to_str(graph, candidate): return graph.label def apply(self, sdfg: sd.SDFG): ####################################################### # Step 0: SDFG metadata # Find all input and output data descriptors input_nodes = [] output_nodes = [] global_code_nodes: Dict[sd.SDFGState, nodes.Tasklet] = defaultdict(list) for state in sdfg.nodes(): sdict = state.scope_dict() for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and node.desc(sdfg).transient == False): if (state.out_degree(node) > 0 and node.data not in input_nodes): # Special case: nodes that lead to top-level dynamic # map ranges must stay on host for e in state.out_edges(node): last_edge = state.memlet_path(e)[-1] if (isinstance(last_edge.dst, nodes.EntryNode) and last_edge.dst_conn and not last_edge.dst_conn.startswith('IN_') and sdict[last_edge.dst] is None): break else: input_nodes.append((node.data, node.desc(sdfg))) if (state.in_degree(node) > 0 and node.data not in output_nodes): output_nodes.append((node.data, node.desc(sdfg))) # Input nodes may also be nodes with WCR memlets and no identity for e in state.edges(): if e.data.wcr is not None: if (e.data.data not in input_nodes and sdfg.arrays[e.data.data].transient == False): input_nodes.append( (e.data.data, sdfg.arrays[e.data.data])) start_state = sdfg.start_state end_states = sdfg.sink_nodes() ####################################################### # Step 1: Create cloned GPU arrays and replace originals cloned_arrays = {} for inodename, inode in set(input_nodes): if isinstance(inode, data.Scalar): # Scalars can remain on host continue if inode.storage == dtypes.StorageType.GPU_Global: continue newdesc = inode.clone() newdesc.storage = dtypes.StorageType.GPU_Global newdesc.transient = True name = sdfg.add_datadesc('gpu_' + inodename, newdesc, find_new_name=True) cloned_arrays[inodename] = name for onodename, onode in set(output_nodes): if onodename in cloned_arrays: continue if onode.storage == dtypes.StorageType.GPU_Global: continue newdesc = onode.clone() newdesc.storage = dtypes.StorageType.GPU_Global newdesc.transient = True name = sdfg.add_datadesc('gpu_' + onodename, newdesc, find_new_name=True) cloned_arrays[onodename] = name # Replace nodes for state in sdfg.nodes(): for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and node.data in cloned_arrays): node.data = cloned_arrays[node.data] # Replace memlets for state in sdfg.nodes(): for edge in state.edges(): if edge.data.data in cloned_arrays: edge.data.data = cloned_arrays[edge.data.data] ####################################################### # Step 2: Create copy-in state excluded_copyin = self.exclude_copyin.split(',') copyin_state = sdfg.add_state(sdfg.label + '_copyin') sdfg.add_edge(copyin_state, start_state, sd.InterstateEdge()) for nname, desc in dtypes.deduplicate(input_nodes): if nname in excluded_copyin or nname not in cloned_arrays: continue src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo) dst_array = nodes.AccessNode(cloned_arrays[nname], debuginfo=desc.debuginfo) copyin_state.add_node(src_array) copyin_state.add_node(dst_array) copyin_state.add_nedge( src_array, dst_array, memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg))) ####################################################### # Step 3: Create copy-out state excluded_copyout = self.exclude_copyout.split(',') copyout_state = sdfg.add_state(sdfg.label + '_copyout') for state in end_states: sdfg.add_edge(state, copyout_state, sd.InterstateEdge()) for nname, desc in dtypes.deduplicate(output_nodes): if nname in excluded_copyout or nname not in cloned_arrays: continue src_array = nodes.AccessNode(cloned_arrays[nname], debuginfo=desc.debuginfo) dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo) copyout_state.add_node(src_array) copyout_state.add_node(dst_array) copyout_state.add_nedge( src_array, dst_array, memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg))) ####################################################### # Step 4: Modify transient data storage const_syms = xfh.constant_symbols(sdfg) for state in sdfg.nodes(): sdict = state.scope_dict() for node in state.nodes(): if isinstance(node, nodes.AccessNode) and node.desc(sdfg).transient: nodedesc = node.desc(sdfg) # Special case: nodes that lead to dynamic map ranges must # stay on host if any( isinstance( state.memlet_path(e)[-1].dst, nodes.EntryNode) for e in state.out_edges(node)): continue gpu_storage = [ dtypes.StorageType.GPU_Global, dtypes.StorageType.GPU_Shared, dtypes.StorageType.CPU_Pinned ] if sdict[ node] is None and nodedesc.storage not in gpu_storage: # NOTE: the cloned arrays match too but it's the same # storage so we don't care nodedesc.storage = dtypes.StorageType.GPU_Global # Try to move allocation/deallocation out of loops dsyms = set(map(str, nodedesc.free_symbols)) if (self.toplevel_trans and not isinstance(nodedesc, (data.Stream, data.View)) and len(dsyms - const_syms) == 0): nodedesc.lifetime = dtypes.AllocationLifetime.SDFG elif nodedesc.storage not in gpu_storage: # Make internal transients registers if self.register_trans: nodedesc.storage = dtypes.StorageType.Register ####################################################### # Step 5: Change all top-level maps and library nodes to GPU schedule for state in sdfg.nodes(): sdict = state.scope_dict() for node in state.nodes(): if sdict[node] is None: if isinstance(node, (nodes.LibraryNode, nodes.NestedSDFG)): node.schedule = dtypes.ScheduleType.GPU_Default elif isinstance(node, nodes.EntryNode): node.schedule = dtypes.ScheduleType.GPU_Device elif self.sequential_innermaps: if isinstance(node, (nodes.EntryNode, nodes.LibraryNode)): node.schedule = dtypes.ScheduleType.Sequential elif isinstance(node, nodes.NestedSDFG): for nnode, _ in node.sdfg.all_nodes_recursive(): if isinstance(nnode, (nodes.EntryNode, nodes.LibraryNode)): nnode.schedule = dtypes.ScheduleType.Sequential ####################################################### # Step 6: Wrap free tasklets and nested SDFGs with a GPU map # Collect free tasklets for node, state in sdfg.all_nodes_recursive(): if isinstance(node, nodes.Tasklet): if (state.entry_node(node) is None and not scope.is_devicelevel_gpu( state.parent, state, node, with_gpu_default=True)): global_code_nodes[state].append(node) for state, gcodes in global_code_nodes.items(): for gcode in gcodes: if gcode.label in self.exclude_tasklets.split(','): continue # Create map and connectors me, mx = state.add_map(gcode.label + '_gmap', {gcode.label + '__gmapi': '0:1'}, schedule=dtypes.ScheduleType.GPU_Device) # Store in/out edges in lists so that they don't get corrupted # when they are removed from the graph in_edges = list(state.in_edges(gcode)) out_edges = list(state.out_edges(gcode)) me.in_connectors = {('IN_' + e.dst_conn): None for e in in_edges} me.out_connectors = {('OUT_' + e.dst_conn): None for e in in_edges} mx.in_connectors = {('IN_' + e.src_conn): None for e in out_edges} mx.out_connectors = {('OUT_' + e.src_conn): None for e in out_edges} # Create memlets through map for e in in_edges: state.remove_edge(e) state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn, e.data) state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn, e.data) for e in out_edges: state.remove_edge(e) state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn, e.data) state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn, e.data) # Map without inputs if len(in_edges) == 0: state.add_nedge(me, gcode, memlet.Memlet()) ####################################################### # Step 7: Introduce copy-out if data used in outgoing interstate edges for state in list(sdfg.nodes()): arrays_used = set() for e in sdfg.out_edges(state): # Used arrays = intersection between symbols and cloned arrays arrays_used.update( set(e.data.free_symbols) & set(cloned_arrays.keys())) # Create a state and copy out used arrays if len(arrays_used) > 0: co_state = sdfg.add_state(state.label + '_icopyout') # Reconnect outgoing edges to after interim copyout state for e in sdfg.out_edges(state): sdutil.change_edge_src(sdfg, state, co_state) # Add unconditional edge to interim state sdfg.add_edge(state, co_state, sd.InterstateEdge()) # Add copy-out nodes for nname in arrays_used: desc = sdfg.arrays[nname] src_array = nodes.AccessNode(cloned_arrays[nname], debuginfo=desc.debuginfo) dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo) co_state.add_node(src_array) co_state.add_node(dst_array) co_state.add_nedge( src_array, dst_array, memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg))) ####################################################### # Step 8: Strict transformations if not self.strict_transform: return # Apply strict state fusions greedily. sdfg.apply_strict_transformations()
class Reduce(dace.sdfg.nodes.LibraryNode): """ An SDFG node that reduces an N-dimensional array to an (N-k)-dimensional array, with a list of axes to reduce and a reduction binary function. """ # Global properties implementations = { 'pure': ExpandReducePure, 'OpenMP': ExpandReduceOpenMP, 'CUDA (device)': ExpandReduceCUDADevice, 'CUDA (block)': ExpandReduceCUDABlock, 'CUDA (block allreduce)': ExpandReduceCUDABlockAll, 'FPGAPartialReduction': ExpandReduceFPGAPartialReduction # 'CUDA (warp)': ExpandReduceCUDAWarp, # 'CUDA (warp allreduce)': ExpandReduceCUDAWarpAll } default_implementation = 'pure' # Properties axes = ListProperty(element_type=int, allow_none=True) wcr = LambdaProperty(default='lambda a, b: a') identity = Property(allow_none=True) def __init__(self, wcr='lambda a, b: a', axes=None, identity=None, schedule=dtypes.ScheduleType.Default, debuginfo=None, **kwargs): super().__init__(name='Reduce', **kwargs) self.wcr = wcr self.axes = axes self.identity = identity self.debuginfo = debuginfo self.schedule = schedule @staticmethod def from_json(json_obj, context=None): ret = Reduce("lambda a, b: a", None) dace.serialize.set_properties_from_json(ret, json_obj, context=context) return ret def __str__(self): # Autodetect reduction type redtype = detect_reduction_type(self.wcr) if redtype == dtypes.ReductionType.Custom: wcrstr = unparse(ast.parse(self.wcr).body[0].value.body) else: wcrstr = str(redtype) wcrstr = wcrstr[wcrstr.find('.') + 1:] # Skip "ReductionType." return 'Reduce ({op}), Axes: {axes}'.format( axes=('all' if self.axes is None else str(self.axes)), op=wcrstr) def __label__(self, sdfg, state): return str(self).replace(' Axes', '\nAxes') def validate(self, sdfg, state): if len(state.in_edges(self)) != 1: raise ValueError('Reduce node must have one input') if len(state.out_edges(self)) != 1: raise ValueError('Reduce node must have one output')
class GPUTransformState(pattern_matching.Transformation): """ Implements the GPUTransformState transformation. Transforms a whole SDFG to run on the GPU: Steps of the full GPU transform 0. Acquire metadata about SDFG and arrays 1. Replace all non-transients with their GPU counterparts 2. Copy-in state from host to GPU 3. Copy-out state from GPU to host 4. Re-store Default-top/CPU_Heap transients as GPU_Global 5. Global tasklets are wrapped with a map of size 1 6. Global Maps are re-scheduled to use the GPU 7. Re-apply strict transformations to get rid of extra states and transients """ toplevel_trans = Property(desc="Make all GPU transients top-level", dtype=bool, default=True) register_trans = Property( desc="Make all transients inside GPU maps registers", dtype=bool, default=True) sequential_innermaps = Property(desc="Make all internal maps Sequential", dtype=bool, default=True) strict_transform = Property( desc='Reapply strict transformations after modifying graph', dtype=bool, default=True) @staticmethod def annotates_memlets(): # Skip memlet propagation for now return True @staticmethod def expressions(): # Matches anything return [sd.SDFG('_')] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): return True @staticmethod def match_to_str(graph, candidate): return graph.label def modifies_graph(self): return True def apply(self, sdfg: sd.SDFG): ####################################################### # Step 0: SDFG metadata # Find all input and output data descriptors input_nodes = [] output_nodes = [] global_code_nodes = [[] for _ in sdfg.nodes()] for i, state in enumerate(sdfg.nodes()): sdict = state.scope_dict() for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and node.desc(sdfg).transient == False): if (state.out_degree(node) > 0 and node.data not in input_nodes): input_nodes.append((node.data, node.desc(sdfg))) if (state.in_degree(node) > 0 and node.data not in output_nodes): output_nodes.append((node.data, node.desc(sdfg))) elif isinstance(node, nodes.CodeNode) and sdict[node] is None: if not isinstance(node, nodes.EmptyTasklet): global_code_nodes[i].append(node) # Input nodes may also be nodes with WCR memlets and no identity for e in state.edges(): if e.data.wcr is not None and e.data.wcr_identity is None: if (e.data.data not in input_nodes and sdfg.arrays[e.data.data].transient == False): input_nodes.append(e.data.data) start_state = sdfg.start_state end_states = sdfg.sink_nodes() ####################################################### # Step 1: Create cloned GPU arrays and replace originals cloned_arrays = {} for inodename, inode in input_nodes: newdesc = inode.clone() newdesc.storage = types.StorageType.GPU_Global newdesc.transient = True sdfg.add_datadesc('gpu_' + inodename, newdesc) cloned_arrays[inodename] = 'gpu_' + inodename for onodename, onode in output_nodes: if onodename in cloned_arrays: continue newdesc = onode.clone() newdesc.storage = types.StorageType.GPU_Global newdesc.transient = True sdfg.add_datadesc('gpu_' + onodename, newdesc) cloned_arrays[onodename] = 'gpu_' + onodename # Replace nodes for state in sdfg.nodes(): for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and node.data in cloned_arrays): node.data = cloned_arrays[node.data] # Replace memlets for state in sdfg.nodes(): for edge in state.edges(): if edge.data.data in cloned_arrays: edge.data.data = cloned_arrays[edge.data.data] ####################################################### # Step 2: Create copy-in state copyin_state = sdfg.add_state(sdfg.label + '_copyin') sdfg.add_edge(copyin_state, start_state, ed.InterstateEdge()) for nname, desc in input_nodes: src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo) dst_array = nodes.AccessNode(cloned_arrays[nname], debuginfo=desc.debuginfo) copyin_state.add_node(src_array) copyin_state.add_node(dst_array) copyin_state.add_nedge( src_array, dst_array, memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg))) ####################################################### # Step 3: Create copy-out state copyout_state = sdfg.add_state(sdfg.label + '_copyout') for state in end_states: sdfg.add_edge(state, copyout_state, ed.InterstateEdge()) for nname, desc in output_nodes: src_array = nodes.AccessNode(cloned_arrays[nname], debuginfo=desc.debuginfo) dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo) copyout_state.add_node(src_array) copyout_state.add_node(dst_array) copyout_state.add_nedge( src_array, dst_array, memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg))) ####################################################### # Step 4: Modify transient data storage for state in sdfg.nodes(): sdict = state.scope_dict() for node in state.nodes(): if isinstance(node, nodes.AccessNode) and node.desc(sdfg).transient: nodedesc = node.desc(sdfg) if sdict[node] is None: # NOTE: the cloned arrays match too but it's the same # storage so we don't care nodedesc.storage = types.StorageType.GPU_Global # Try to move allocation/deallocation out of loops if self.toplevel_trans: nodedesc.toplevel = True else: # Make internal transients registers if self.register_trans: nodedesc.storage = types.StorageType.Register ####################################################### # Step 5: Wrap free tasklets and nested SDFGs with a GPU map for state, gcodes in zip(sdfg.nodes(), global_code_nodes): for gcode in gcodes: # Create map and connectors me, mx = state.add_map(gcode.label + '_gmap', {gcode.label + '__gmapi': '0:1'}, schedule=types.ScheduleType.GPU_Device) # Store in/out edges in lists so that they don't get corrupted # when they are removed from the graph in_edges = list(state.in_edges(gcode)) out_edges = list(state.out_edges(gcode)) me.in_connectors = set('IN_' + e.dst_conn for e in in_edges) me.out_connectors = set('OUT_' + e.dst_conn for e in in_edges) mx.in_connectors = set('IN_' + e.src_conn for e in out_edges) mx.out_connectors = set('OUT_' + e.src_conn for e in out_edges) # Create memlets through map for e in in_edges: state.remove_edge(e) state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn, e.data) state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn, e.data) for e in out_edges: state.remove_edge(e) state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn, e.data) state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn, e.data) # Map without inputs if len(in_edges) == 0: state.add_nedge(me, gcode, memlet.EmptyMemlet()) ####################################################### # Step 6: Change all top-level maps to GPU maps for i, state in enumerate(sdfg.nodes()): sdict = state.scope_dict() for node in state.nodes(): if isinstance(node, nodes.EntryNode): if sdict[node] is None: node.schedule = types.ScheduleType.GPU_Device elif self.sequential_innermaps: node.schedule = types.ScheduleType.Sequential ####################################################### # Step 7: Strict transformations if not self.strict_transform: return # Apply strict state fusions greedily. opt = optimizer.SDFGOptimizer(sdfg, inplace=True) fusions = 0 arrays = 0 options = [ match for match in opt.get_pattern_matches(strict=True) if isinstance(match, (StateFusion, RedundantArray)) ] while options: ssdfg = sdfg.sdfg_list[options[0].sdfg_id] options[0].apply(ssdfg) ssdfg.validate() if isinstance(options[0], StateFusion): fusions += 1 if isinstance(options[0], RedundantArray): arrays += 1 options = [ match for match in opt.get_pattern_matches(strict=True) if isinstance(match, (StateFusion, RedundantArray)) ] if Config.get_bool('debugprint') and (fusions > 0 or arrays > 0): print('Automatically applied {} strict state fusions and removed' ' {} redundant arrays.'.format(fusions, arrays))
class MapTiling(transformation.Transformation): """ Implements the orthogonal tiling transformation. Orthogonal tiling is a type of nested map fission that creates tiles in every dimension of the matched Map. """ _map_entry = nodes.MapEntry(nodes.Map("", [], [])) # Properties prefix = Property(dtype=str, default="tile", desc="Prefix for new range symbols") tile_sizes = ShapeProperty(dtype=tuple, default=(128, 128, 128), desc="Tile size per dimension") strides = ShapeProperty( dtype=tuple, default=tuple(), desc="Tile stride (enables overlapping tiles). If empty, matches tile") tile_offset = ShapeProperty(dtype=tuple, default=None, desc="Negative Stride offset per dimension", allow_none=True) divides_evenly = Property(dtype=bool, default=False, desc="Tile size divides dimension length evenly") tile_trivial = Property(dtype=bool, default=False, desc="Tiles even if tile_size is 1") @staticmethod def annotates_memlets(): return True @staticmethod def expressions(): return [sdutil.node_path_graph(MapTiling._map_entry)] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): return True @staticmethod def match_to_str(graph, candidate): map_entry = graph.nodes()[candidate[MapTiling._map_entry]] return map_entry.map.label + ': ' + str(map_entry.map.params) def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] tile_strides = self.tile_sizes if self.strides is not None and len(self.strides) == len(tile_strides): tile_strides = self.strides # Retrieve map entry and exit nodes. map_entry = graph.nodes()[self.subgraph[MapTiling._map_entry]] from dace.transformation.dataflow.map_collapse import MapCollapse from dace.transformation.dataflow.strip_mining import StripMining stripmine_subgraph = { StripMining._map_entry: self.subgraph[MapTiling._map_entry] } sdfg_id = sdfg.sdfg_id last_map_entry = None removed_maps = 0 original_schedule = map_entry.schedule for dim_idx in range(len(map_entry.map.params)): if dim_idx >= len(self.tile_sizes): tile_size = symbolic.pystr_to_symbolic(self.tile_sizes[-1]) tile_stride = symbolic.pystr_to_symbolic(tile_strides[-1]) else: tile_size = symbolic.pystr_to_symbolic( self.tile_sizes[dim_idx]) tile_stride = symbolic.pystr_to_symbolic(tile_strides[dim_idx]) # handle offsets if self.tile_offset and dim_idx >= len(self.tile_offset): offset = self.tile_offset[-1] elif self.tile_offset: offset = self.tile_offset[dim_idx] else: offset = 0 dim_idx -= removed_maps # If tile size is trivial, skip strip-mining map dimension if tile_size == map_entry.map.range.size()[dim_idx]: continue stripmine = StripMining(sdfg_id, self.state_id, stripmine_subgraph, self.expr_index) # Special case: Tile size of 1 should be omitted from inner map if tile_size == 1 and tile_stride == 1 and self.tile_trivial == False: stripmine.dim_idx = dim_idx stripmine.new_dim_prefix = '' stripmine.tile_size = str(tile_size) stripmine.tile_stride = str(tile_stride) stripmine.divides_evenly = True stripmine.tile_offset = str(offset) stripmine.apply(sdfg) removed_maps += 1 else: stripmine.dim_idx = dim_idx stripmine.new_dim_prefix = self.prefix stripmine.tile_size = str(tile_size) stripmine.tile_stride = str(tile_stride) stripmine.divides_evenly = self.divides_evenly stripmine.tile_offset = str(offset) stripmine.apply(sdfg) # apply to the new map the schedule of the original one map_entry.schedule = original_schedule if last_map_entry: new_map_entry = graph.in_edges(map_entry)[0].src mapcollapse_subgraph = { MapCollapse._outer_map_entry: graph.node_id(last_map_entry), MapCollapse._inner_map_entry: graph.node_id(new_map_entry) } mapcollapse = MapCollapse(sdfg_id, self.state_id, mapcollapse_subgraph, 0) mapcollapse.apply(sdfg) last_map_entry = graph.in_edges(map_entry)[0].src
class Transformation(object): """ Base class for transformations, as well as a static registry of transformations, where new transformations can be added in a decentralized manner. """ #################################################################### # Transformation registry # Class attributes _patterns = set() _stateflow_patterns = set() # Static methods @staticmethod def patterns(): """ Returns a list of single-state (dataflow) transformations currently in the registry. """ pattern_list = sorted(Transformation._patterns, key=lambda cls: cls.__name__) return pattern_list @staticmethod def stateflow_patterns(): """ Returns a list of multiple-state (interstate) transformations currently in the registry. """ pattern_list = sorted(Transformation._stateflow_patterns, key=lambda cls: cls.__name__) return pattern_list @staticmethod def register_pattern(clazz): """ Registers a single-state (dataflow) transformation in the registry. @param clazz: The Transformation class type. """ if not issubclass(clazz, Transformation): raise TypeError Transformation._patterns.add(clazz) @staticmethod def register_stateflow_pattern(clazz): """ Registers a multi-state transformation in the registry. @param clazz: The Transformation class type. """ if not issubclass(clazz, Transformation): raise TypeError Transformation._stateflow_patterns.add(clazz) @staticmethod def register_pattern_file(filename): """ Registers all transformations in a single Python file. """ pattern_members = {} with open(filename) as pattern_file: exec(pattern_file.read(), pattern_members) for member in pattern_members.values(): if inspect.isclass(member) and issubclass(member, Transformation): Transformation.register_pattern(member) @staticmethod def deregister_pattern(clazz): """ De-registers a transformation. @param clazz: The Transformation class type. """ if not issubclass(clazz, Transformation): raise TypeError Transformation._patterns.remove(clazz) #################################################################### # Static and object methods # Properties sdfg_id = Property(dtype=int, category="(Debug)") state_id = Property(dtype=int, category="(Debug)") subgraph = SubgraphProperty(dtype=dict, category="(Debug)") expr_index = Property(dtype=int, category="(Debug)") @staticmethod def annotates_memlets(): """ Indicates whether the transformation annotates the edges it creates or modifies with the appropriate memlets. This determines whether to apply memlet propagation after the transformation. """ return False @staticmethod def expressions(): """ Returns a list of Graph objects that will be matched in the subgraph isomorphism phase. Used as a pre-pass before calling `can_be_applied`. @see Transformation.can_be_applied """ raise NotImplementedError @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): """ Returns True if this transformation can be applied on the candidate matched subgraph. @param graph: SDFGState object if this Transformation is single-state, or SDFG object otherwise. @param candidate: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. @param expr_index: The list index from `Transformation.expressions` that was matched. @param sdfg: If `graph` is an SDFGState, its parent SDFG. Otherwise should be equal to `graph`. @return: True if the transformation can be applied. """ raise NotImplementedError @staticmethod def match_to_str(graph, candidate): """ Returns a string representation of the pattern match on the candidate subgraph. Used when identifying matches in the console UI. """ raise NotImplementedError def __init__(self, sdfg_id, state_id, subgraph, expr_index): """ Initializes an instance of Transformation. @param sdfg_id: A unique ID of the SDFG. @param state_id: The node ID of the SDFG state, if applicable. @param subgraph: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. @param expr_index: The list index from `Transformation.expressions` that was matched. @raise TypeError: When transformation is not subclass of Transformation. @raise TypeError: When state_id is not instance of int. @raise TypeError: When subgraph is not a dict of dace.graph.nodes.Node : int. """ self.sdfg_id = sdfg_id self.state_id = state_id for value in subgraph.values(): if not isinstance(value, int): raise TypeError('All values of ' 'subgraph' ' dictionary must be ' 'instances of int.') self.subgraph = subgraph self.expr_index = expr_index def __lt__(self, other): """ Comparing two transformations by their class name and node IDs in match. Used for ordering transformations consistently. """ if type(self) != type(other): return type(self).__name__ < type(other).__name__ self_ids = iter(self.subgraph.values()) other_ids = iter(self.subgraph.values()) try: self_id = next(self_ids) except StopIteration: return True try: other_id = next(other_ids) except StopIteration: return False self_end = False while self_id is not None and other_id is not None: if self_id != other_id: return self_id < other_id try: self_id = next(self_ids) except StopIteration: self_end = True try: other_id = next(other_ids) except StopIteration: if self_end: # Transformations are equal return False return False if self_end: return True def apply_pattern(self, sdfg): """ Applies this transformation on the given SDFG. """ self.apply(sdfg) if not self.annotates_memlets(): labeling.propagate_labels_sdfg(sdfg) def __str__(self): raise NotImplementedError def print_match(self, sdfg): """ Returns a string representation of the pattern match on the given SDFG. Used for printing matches in the console UI. """ if not isinstance(sdfg, dace.SDFG): raise TypeError("Expected SDFG, got: {}".format( type(sdfg).__name__)) if self.state_id == -1: graph = sdfg else: graph = sdfg.nodes()[self.state_id] string = type(self).__name__ + ' in ' string += type(self).match_to_str(graph, self.subgraph) return string @staticmethod def print_debuginfo(): pass
class GPUPersistentKernel(SubgraphTransformation): """ This transformation takes a given subgraph of an SDFG and fuses the given states into a single persistent GPU kernel. Before this transformation can be applied the SDFG needs to be transformed to run on the GPU (e.g. with the GPUTransformSDFG transformation). If applicable the transform removes the selected states from the original SDFG and places a `launch` state in its place. The removed states will be added to a nested SDFG in the launch state. If necessary guard states will be added in the nested SDFG, in order to make sure global assignments on Interstate edges will be performed in the kernel (this can be disabled with the `include_in_assignment` property). The given subgraph needs to fulfill the following properties to be fused: - All states in the selected subgraph need to fulfill the following: - access only GPU accessible memory - all concurrent DFGs inside the state are either sequential or inside a GPU_Device map. - the selected subgraph has a single point of entry in the form of a single InterstateEdge entering the subgraph (i.e. there is at most one state (not part of the subgraph) from which the kernel is entered and exactly one state inside the subgraph from which the kernel starts execution) - the selected subgraph has a single point of exit in the form of a single state that is entered after the selected subgraph is left (There can be multiple states from which the kernel can be left, but all will leave to the same state outside the subgraph) """ validate = Property( desc="Validate the sdfg and the nested sdfg", dtype=bool, default=True, ) include_in_assignment = Property( desc="Wether to include global variable assignments of the edge going " "into the kernel inside the kernel or have it happen on the " "outside. If the assignment is needed in the kernel, it needs to " "be included.", dtype=bool, default=True, ) kernel_prefix = Property( desc="Name of the kernel. If no value is given the kerenl will be " "refrenced as `kernel`, if a value is given the kernel will be " "named `<kernel_prefix>_kernel`. This is useful if multiple " "kernels are created.", dtype=str, default='', ) @staticmethod def can_be_applied(sdfg: SDFG, subgraph: SubgraphView): if not set(subgraph.nodes()).issubset(set(sdfg.nodes())): return False # All states need to be GPU states for state in subgraph: if not GPUPersistentKernel.is_gpu_state(sdfg, state): return False # for now exactly one inner and one outer entry state entry_states_in, entry_states_out = \ GPUPersistentKernel.get_entry_states(sdfg, subgraph) if len(entry_states_in) > 1 or len(entry_states_out) > 1: return False entry_state_in = entry_states_in.pop() if len(entry_states_out) == 1 \ and len(sdfg.edges_between(entry_states_out.pop(), entry_state_in) ) > 1: return False # for now only one outside state allowed, multiple inner exit states # allowed _, exit_states_out = GPUPersistentKernel.get_exit_states( sdfg, subgraph) if len(exit_states_out) > 1: return False # check reachability front = [entry_state_in] reachable = {entry_state_in} while len(front) > 0: current = front.pop(0) unseen = [ suc for suc in subgraph.successors(current) if suc not in reachable ] front += unseen reachable.update(unseen) if reachable != set(subgraph.nodes()): return False return True def apply(self, sdfg: SDFG): subgraph = self.subgraph_view(sdfg) if not self.can_be_applied(sdfg, subgraph): raise Exception('The given subgraph cannot be fused!') entry_states_in, entry_states_out = self.get_entry_states( sdfg, subgraph) _, exit_states_out = self.get_exit_states(sdfg, subgraph) entry_state_in = entry_states_in.pop() entry_state_out = entry_states_out.pop() \ if len(entry_states_out) > 0 else None exit_state_out = exit_states_out.pop() \ if len(exit_states_out) > 0 else None launch_state = None entry_guard_state = None exit_guard_state = None # generate entry guard state if needed if self.include_in_assignment and entry_state_out is not None: entry_edge = sdfg.edges_between(entry_state_out, entry_state_in)[0] if len(entry_edge.data.assignments) > 0: entry_guard_state = sdfg.add_state( label='{}kernel_entry_guard'.format( self.kernel_prefix + '_' if self.kernel_prefix != '' else '')) sdfg.add_edge(entry_state_out, entry_guard_state, InterstateEdge(entry_edge.data.condition)) sdfg.add_edge( entry_guard_state, entry_state_in, InterstateEdge(None, entry_edge.data.assignments)) sdfg.remove_edge(entry_edge) # Update SubgraphView new_node_list = subgraph.nodes() new_node_list.append(entry_guard_state) subgraph = SubgraphView(sdfg, new_node_list) launch_state = sdfg.add_state_before( entry_guard_state, label='{}kernel_launch'.format( self.kernel_prefix + '_' if self.kernel_prefix != '' else '')) # generate exit guard state if exit_state_out is not None: exit_guard_state = sdfg.add_state_before( exit_state_out, label='{}kernel_exit_guard'.format( self.kernel_prefix + '_' if self.kernel_prefix != '' else '')) # Update SubgraphView new_node_list = subgraph.nodes() new_node_list.append(exit_guard_state) subgraph = SubgraphView(sdfg, new_node_list) if launch_state is None: launch_state = sdfg.add_state_before( exit_state_out, label='{}kernel_launch'.format( self.kernel_prefix + '_' if self.kernel_prefix != '' else '')) # If the launch state doesn't exist at this point then there is no other # states outside of the kernel, so create a stand alone launch state if launch_state is None: assert (entry_state_in is None and exit_state_out is None) launch_state = sdfg.add_state(label='{}kernel_launch'.format( self.kernel_prefix + '_' if self.kernel_prefix != '' else '')) # create sdfg for kernel and fill it with states and edges from # ssubgraph dfg will be nested at the end kernel_sdfg = SDFG( '{}kernel'.format(self.kernel_prefix + '_' if self.kernel_prefix != '' else '')) edges = subgraph.edges() for edge in edges: kernel_sdfg.add_edge(edge.src, edge.dst, edge.data) # Setting entry node in nested SDFG if no entry guard was created if entry_guard_state is None: kernel_sdfg.start_state = kernel_sdfg.node_id(entry_state_in) for state in subgraph: state.parent = kernel_sdfg # remove the now nested nodes from the outer sdfg and make sure the # launch state is properly connected to remaining states sdfg.remove_nodes_from(subgraph.nodes()) if entry_state_out is not None \ and len(sdfg.edges_between(entry_state_out, launch_state)) == 0: sdfg.add_edge(entry_state_out, launch_state, InterstateEdge()) if exit_state_out is not None \ and len(sdfg.edges_between(launch_state, exit_state_out)) == 0: sdfg.add_edge(launch_state, exit_state_out, InterstateEdge()) # Handle data for kernel kernel_data = set(node.data for state in kernel_sdfg for node in state.nodes() if isinstance(node, nodes.AccessNode)) # move Streams and Register data into the nested SDFG # normal data will be added as kernel argument kernel_args = [] for data in kernel_data: if (isinstance(sdfg.arrays[data], dace.data.Stream) or (isinstance(sdfg.arrays[data], dace.data.Array) and sdfg.arrays[data].storage == StorageType.Register)): kernel_sdfg.add_datadesc(data, sdfg.arrays[data]) del sdfg.arrays[data] else: copy_desc = copy.deepcopy(sdfg.arrays[data]) copy_desc.transient = False copy_desc.storage = StorageType.Default kernel_sdfg.add_datadesc(data, copy_desc) kernel_args.append(data) # read only data will be passed as input, writeable data will be passed # as 'output' otherwise kernel cannot write to data kernel_args_read = set() kernel_args_write = set() for data in kernel_args: data_accesses_read_only = [ node.access == dtypes.AccessType.ReadOnly for state in kernel_sdfg for node in state if isinstance(node, nodes.AccessNode) and node.data == data ] if all(data_accesses_read_only): kernel_args_read.add(data) else: kernel_args_write.add(data) # Kernel SDFG is complete at this point if self.validate: kernel_sdfg.validate() # Filling launch state with nested SDFG, map and access nodes map_entry, map_exit = launch_state.add_map( '{}kernel_launch_map'.format( self.kernel_prefix + '_' if self.kernel_prefix != '' else ''), dict(ignore='0'), schedule=ScheduleType.GPU_Persistent, ) nested_sdfg = launch_state.add_nested_sdfg( kernel_sdfg, sdfg, kernel_args_read, kernel_args_write, ) # Create and connect read only data access nodes for arg in kernel_args_read: read_node = launch_state.add_read(arg) launch_state.add_memlet_path(read_node, map_entry, nested_sdfg, dst_conn=arg, memlet=Memlet.from_array( arg, sdfg.arrays[arg])) # Create and connect writable data access nodes for arg in kernel_args_write: write_node = launch_state.add_write(arg) launch_state.add_memlet_path(nested_sdfg, map_exit, write_node, src_conn=arg, memlet=Memlet.from_array( arg, sdfg.arrays[arg])) # Transformation is done if self.validate: sdfg.validate() @staticmethod def is_gpu_state(sdfg: SDFG, state: SDFGState) -> bool: # Valid storrage types gpu_accessible = [ StorageType.GPU_Global, StorageType.GPU_Shared, StorageType.CPU_Pinned, StorageType.Register, ] for node in state.data_nodes(): if type(node.desc(sdfg)) in [dace.data.Array, dace.data.Stream] \ and node.desc(sdfg).storage not in gpu_accessible: return False gpu_fused_schedules = [ ScheduleType.Default, ScheduleType.Sequential, ScheduleType.GPU_Device, ScheduleType.GPU_ThreadBlock, ScheduleType.GPU_ThreadBlock_Dynamic, ] for schedule in [ n.map.schedule for n in state.nodes() if isinstance(n, nodes.MapEntry) ]: if schedule not in gpu_fused_schedules: return False return True @staticmethod def get_entry_states(sdfg: SDFG, subgraph): entry_states_in = set() entry_states_out = set() for state in subgraph: inner_predecessors = set(subgraph.predecessors(state)) global_predecessors = set(sdfg.predecessors(state)) outer_predecessors = global_predecessors - inner_predecessors if len(outer_predecessors) > 0: entry_states_in.add(state) entry_states_out |= outer_predecessors return entry_states_in, entry_states_out @staticmethod def get_exit_states(sdfg: SDFG, subgraph): exit_states_in = set() exit_states_out = set() for state in subgraph: inner_successors = set(subgraph.successors(state)) global_successors = set(sdfg.successors(state)) outer_successors = global_successors - inner_successors if len(outer_successors) > 0: exit_states_in.add(state) exit_states_out |= outer_successors return exit_states_in, exit_states_out
class SubgraphTransformation(object): """ Base class for transformations that apply on arbitrary subgraphs, rather than matching a specific pattern. Subclasses need to implement the `match` and `apply` operations. """ sdfg_id = Property(dtype=int, desc='ID of SDFG to transform') state_id = Property( dtype=int, desc='ID of state to transform subgraph within, or -1 to transform the ' 'SDFG') subgraph = SetProperty(element_type=int, desc='Subgraph in transformation instance') def __init__(self, subgraph: Union[Set[int], SubgraphView], sdfg_id: int = None, state_id: int = None): if (not isinstance(subgraph, (SubgraphView, SDFG, SDFGState)) and (sdfg_id is None or state_id is None)): raise TypeError( 'Subgraph transformation either expects a SubgraphView or a ' 'set of node IDs, SDFG ID and state ID (or -1).') # An entire graph is given as a subgraph if isinstance(subgraph, (SDFG, SDFGState)): subgraph = SubgraphView(subgraph, subgraph.nodes()) if isinstance(subgraph, SubgraphView): self.subgraph = set( subgraph.graph.node_id(n) for n in subgraph.nodes()) if isinstance(subgraph.graph, SDFGState): sdfg = subgraph.graph.parent self.sdfg_id = sdfg.sdfg_id self.state_id = sdfg.node_id(subgraph.graph) elif isinstance(subgraph.graph, SDFG): self.sdfg_id = subgraph.graph.sdfg_id self.state_id = -1 else: raise TypeError('Unrecognized graph type "%s"' % type(subgraph.graph).__name__) else: self.subgraph = subgraph self.sdfg_id = sdfg_id self.state_id = state_id def subgraph_view(self, sdfg: SDFG) -> SubgraphView: graph = sdfg.sdfg_list[self.sdfg_id] if self.state_id != -1: graph = graph.node(self.state_id) return SubgraphView(graph, [graph.node(idx) for idx in self.subgraph]) @staticmethod def match(sdfg: SDFG, subgraph: SubgraphView) -> bool: """ Tries to match the transformation on a given subgraph, returning True if this transformation can be applied. :param sdfg: The SDFG that includes the subgraph. :param subgraph: The SDFG or state subgraph to try to apply the transformation on. :return: True if the subgraph can be transformed, or False otherwise. """ pass def apply(self, sdfg: SDFG): """ Applies the transformation on the given subgraph. :param sdfg: The SDFG that includes the subgraph. """ pass def to_json(self, parent=None): props = dace.serialize.all_properties_to_json(self) return { 'type': 'SubgraphTransformation', 'transformation': type(self).__name__, **props } @staticmethod def from_json(json_obj, context=None): xform = next(ext for ext in SubgraphTransformation.extensions().keys() if ext.__name__ == json_obj['transformation']) # Reconstruct transformation ret = xform(json_obj['subgraph'], json_obj['sdfg_id'], json_obj['state_id']) context = context or {} context['transformation'] = ret dace.serialize.set_properties_from_json( ret, json_obj, context=context, ignore_properties={'transformation', 'type'}) return ret
class Data(object): """ Data type descriptors that can be used as references to memory. Examples: Arrays, Streams, custom arrays (e.g., sparse matrices). """ dtype = TypeClassProperty() shape = ShapeProperty() transient = Property(dtype=bool) storage = Property(dtype=dace.types.StorageType, desc="Storage location", enum=dace.types.StorageType, default=dace.types.StorageType.Default, from_string=lambda x: types.StorageType[x]) location = Property( dtype=str, # Dict[str, symbolic] desc='Full storage location identifier (e.g., rank, GPU ID)', default='') toplevel = Property(dtype=bool, desc="Allocate array outside of state", default=False) debuginfo = DebugInfoProperty() def __init__(self, dtype, shape, transient, storage, location, toplevel, debuginfo): self.dtype = dtype self.shape = shape self.transient = transient self.storage = storage self.location = location self.toplevel = toplevel self.debuginfo = debuginfo self._validate() def validate(self): """ Validate the correctness of this object. Raises an exception on error. """ self._validate() # Validation of this class is in a separate function, so that this # class can call `_validate()` without calling the subclasses' # `validate` function. def _validate(self): if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.shape): raise TypeError('Shape must be a list or tuple of integer values ' 'or symbols') return True def copy(self): raise RuntimeError( 'Data descriptors are unique and should not be copied') def is_equivalent(self, other): """ Check for equivalence (shape and type) of two data descriptors. """ raise NotImplementedError def signature(self, with_types=True, for_call=False, name=None): """Returns a string for a C++ function signature (e.g., `int *A`). """ raise NotImplementedError def __repr__(self): return 'Abstract Data Container, DO NOT USE'
class GPUTransformLocalStorage(transformation.Transformation): """Implements the GPUTransformLocalStorage transformation. Similar to GPUTransformMap, but takes multiple maps leading from the same data node into account, creating a local storage for each range. @see: GPUTransformMap """ _arrays_removed = 0 _maps_transformed = 0 fullcopy = Property(desc="Copy whole arrays rather than used subset", dtype=bool, default=False) nested_seq = Property( desc="Makes nested code semantically-equivalent to single-core code," "transforming nested maps and memory into sequential and " "local memory respectively.", dtype=bool, default=True, ) _map_entry = nodes.MapEntry(nodes.Map("", [], [])) import dace.libraries.standard as stdlib # Avoid import loop _reduce = stdlib.Reduce("lambda: None", None) @staticmethod def expressions(): return [ sdutil.node_path_graph(GPUTransformLocalStorage._map_entry), sdutil.node_path_graph(GPUTransformLocalStorage._reduce), ] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): if expr_index == 0: map_entry = graph.nodes()[candidate[ GPUTransformLocalStorage._map_entry]] candidate_map = map_entry.map # Disallow GPUTransform on nested maps in strict mode if strict: if graph.entry_node(map_entry) is not None: return False # Map schedules that are disallowed to transform to GPUs if (candidate_map.schedule == dtypes.ScheduleType.MPI or candidate_map.schedule == dtypes.ScheduleType.GPU_Device or candidate_map.schedule == dtypes.ScheduleType.GPU_ThreadBlock or candidate_map.schedule == dtypes.ScheduleType.Sequential): return False # Dynamic map ranges cannot become kernels if sd.has_dynamic_map_inputs(graph, map_entry): return False # Recursively check parent for GPU schedules sdict = graph.scope_dict() current_node = map_entry while current_node is not None: if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device or current_node.map.schedule == dtypes.ScheduleType.GPU_ThreadBlock): return False current_node = sdict[current_node] # Ensure that map does not include internal arrays that are # allocated on non-default space subgraph = graph.scope_subgraph(map_entry) for node in subgraph.nodes(): if (isinstance(node, nodes.AccessNode) and node.desc(sdfg).storage != dtypes.StorageType.Default and node.desc(sdfg).storage != dtypes.StorageType.Register): return False # If one of the outputs is a stream, do not match map_exit = graph.exit_node(map_entry) for edge in graph.out_edges(map_exit): dst = graph.memlet_path(edge)[-1].dst if (isinstance(dst, nodes.AccessNode) and isinstance(sdfg.arrays[dst.data], data.Stream)): return False return True elif expr_index == 1: reduce = graph.nodes()[candidate[GPUTransformLocalStorage._reduce]] # Recursively check parent for GPU schedules sdict = graph.scope_dict() current_node = sdict[reduce] while current_node is not None: if (current_node.map.schedule == dtypes.ScheduleType.GPU_Device or current_node.map.schedule == dtypes.ScheduleType.GPU_ThreadBlock): return False current_node = sdict[current_node] return True @staticmethod def match_to_str(graph, candidate): if GPUTransformLocalStorage._reduce in candidate: return str( graph.nodes()[candidate[GPUTransformLocalStorage._reduce]]) else: map_entry = graph.nodes()[candidate[ GPUTransformLocalStorage._map_entry]] return str(map_entry) def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] if self.expr_index == 0: cnode: nodes.MapEntry = graph.nodes()[self.subgraph[ GPUTransformLocalStorage._map_entry]] # Change schedule cnode.schedule = dtypes.ScheduleType.GPU_Device exit_node = graph.exit_node(cnode) else: cnode: nodes.LibraryNode = graph.nodes()[self.subgraph[ GPUTransformLocalStorage._reduce]] # Change schedule cnode.schedule = dtypes.ScheduleType.GPU_Default exit_node = cnode if Config.get_bool("debugprint"): GPUTransformLocalStorage._maps_transformed += 1 # If nested graph is designated as sequential, transform schedules and # storage from Default to Sequential/Register if self.nested_seq and self.expr_index == 0: for node in graph.scope_subgraph(cnode).nodes(): if isinstance(node, nodes.AccessNode): arr = node.desc(sdfg) if arr.storage == dtypes.StorageType.Default: arr.storage = dtypes.StorageType.Register elif isinstance(node, nodes.MapEntry): if node.map.schedule == dtypes.ScheduleType.Default: node.map.schedule = dtypes.ScheduleType.Sequential gpu_storage_types = [ dtypes.StorageType.GPU_Global, dtypes.StorageType.GPU_Shared, ] ####################################################### # Add GPU copies of CPU arrays (i.e., not already on GPU) # First, understand which arrays to clone all_out_edges = [] all_out_edges.extend(list(graph.out_edges(exit_node))) in_arrays_to_clone = set() out_arrays_to_clone = set() for e in graph.in_edges(cnode): data_node = sd.find_input_arraynode(graph, e) if data_node.desc(sdfg).storage not in gpu_storage_types: in_arrays_to_clone.add((data_node, e.data)) for e in all_out_edges: data_node = sd.find_output_arraynode(graph, e) if data_node.desc(sdfg).storage not in gpu_storage_types: out_arrays_to_clone.add((data_node, e.data)) if Config.get_bool("debugprint"): GPUTransformLocalStorage._arrays_removed += len( in_arrays_to_clone) + len(out_arrays_to_clone) # Second, create a GPU clone of each array # TODO: Overapproximate union of memlets cloned_arrays = {} in_cloned_arraynodes = {} out_cloned_arraynodes = {} for array_node, memlet in in_arrays_to_clone: array = array_node.desc(sdfg) cloned_name = "gpu_" + array_node.data for i, r in enumerate(memlet.bounding_box_size()): size = symbolic.overapproximate(r) try: if int(size) == 1: suffix = [] for c in str(memlet.subset[i][0]): if c.isalpha() or c.isdigit() or c == "_": suffix.append(c) elif c == "+": suffix.append("p") elif c == "-": suffix.append("m") elif c == "*": suffix.append("t") elif c == "/": suffix.append("d") cloned_name += "_" + "".join(suffix) except: continue if cloned_name in sdfg.arrays.keys(): cloned_array = sdfg.arrays[cloned_name] elif array_node.data in cloned_arrays: cloned_array = cloned_arrays[array_node.data] else: full_shape = [] for r in memlet.bounding_box_size(): size = symbolic.overapproximate(r) try: full_shape.append(int(size)) except: full_shape.append(size) actual_dims = [ idx for idx, r in enumerate(full_shape) if not (isinstance(r, int) and r == 1) ] if len(actual_dims) == 0: # abort actual_dims = [len(full_shape) - 1] if isinstance(array, data.Scalar): sdfg.add_array(name=cloned_name, shape=[1], dtype=array.dtype, transient=True, storage=dtypes.StorageType.GPU_Global) elif isinstance(array, data.Stream): sdfg.add_stream( name=cloned_name, dtype=array.dtype, shape=[full_shape[d] for d in actual_dims], veclen=array.veclen, buffer_size=array.buffer_size, storage=dtypes.StorageType.GPU_Global, transient=True, offset=[array.offset[d] for d in actual_dims]) else: sdfg.add_array( name=cloned_name, shape=[full_shape[d] for d in actual_dims], dtype=array.dtype, transient=True, storage=dtypes.StorageType.GPU_Global, allow_conflicts=array.allow_conflicts, strides=[array.strides[d] for d in actual_dims], offset=[array.offset[d] for d in actual_dims], ) cloned_arrays[array_node.data] = cloned_name cloned_node = type(array_node)(cloned_name) in_cloned_arraynodes[array_node.data] = cloned_node for array_node, memlet in out_arrays_to_clone: array = array_node.desc(sdfg) cloned_name = "gpu_" + array_node.data for i, r in enumerate(memlet.bounding_box_size()): size = symbolic.overapproximate(r) try: if int(size) == 1: suffix = [] for c in str(memlet.subset[i][0]): if c.isalpha() or c.isdigit() or c == "_": suffix.append(c) elif c == "+": suffix.append("p") elif c == "-": suffix.append("m") elif c == "*": suffix.append("t") elif c == "/": suffix.append("d") cloned_name += "_" + "".join(suffix) except: continue if cloned_name in sdfg.arrays.keys(): cloned_array = sdfg.arrays[cloned_name] elif array_node.data in cloned_arrays: cloned_array = cloned_arrays[array_node.data] else: full_shape = [] for r in memlet.bounding_box_size(): size = symbolic.overapproximate(r) try: full_shape.append(int(size)) except: full_shape.append(size) actual_dims = [ idx for idx, r in enumerate(full_shape) if not (isinstance(r, int) and r == 1) ] if len(actual_dims) == 0: # abort actual_dims = [len(full_shape) - 1] if isinstance(array, data.Scalar): sdfg.add_array(name=cloned_name, shape=[1], dtype=array.dtype, transient=True, storage=dtypes.StorageType.GPU_Global) elif isinstance(array, data.Stream): sdfg.add_stream( name=cloned_name, dtype=array.dtype, shape=[full_shape[d] for d in actual_dims], veclen=array.veclen, buffer_size=array.buffer_size, storage=dtypes.StorageType.GPU_Global, transient=True, offset=[array.offset[d] for d in actual_dims]) else: sdfg.add_array( name=cloned_name, shape=[full_shape[d] for d in actual_dims], dtype=array.dtype, transient=True, storage=dtypes.StorageType.GPU_Global, allow_conflicts=array.allow_conflicts, strides=[array.strides[d] for d in actual_dims], offset=[array.offset[d] for d in actual_dims], ) cloned_arrays[array_node.data] = cloned_name cloned_node = type(array_node)(cloned_name) cloned_node.setzero = True out_cloned_arraynodes[array_node.data] = cloned_node # Third, connect the cloned arrays to the originals for array_name, node in in_cloned_arraynodes.items(): graph.add_node(node) is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar) for edge in graph.in_edges(cnode): if edge.data.data == array_name: newmemlet = copy.deepcopy(edge.data) newmemlet.data = node.data if is_scalar: newmemlet.subset = sbs.Indices([0]) else: offset = [] lost_dims = [] lost_ranges = [] newsubset = [None] * len(edge.data.subset) for ind, r in enumerate(edge.data.subset): offset.append(r[0]) if isinstance(edge.data.subset[ind], tuple): begin = edge.data.subset[ind][0] - r[0] end = edge.data.subset[ind][1] - r[0] step = edge.data.subset[ind][2] if begin == end: lost_dims.append(ind) lost_ranges.append((begin, end, step)) else: newsubset[ind] = (begin, end, step) else: newsubset[ind] -= r[0] if len(lost_dims) == len(edge.data.subset): lost_dims.pop() newmemlet.subset = type( edge.data.subset)([lost_ranges[-1]]) else: newmemlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) graph.add_edge(node, None, edge.dst, edge.dst_conn, newmemlet) for e in graph.bfs_edges(edge.dst, reverse=False): parent, _, _child, _, memlet = e if parent != edge.dst and not in_scope( graph, parent, edge.dst): break if memlet.data != edge.data.data: continue path = graph.memlet_path(e) if not isinstance(path[-1].dst, nodes.CodeNode): if in_path(path, e, nodes.ExitNode, forward=True): if isinstance(parent, nodes.CodeNode): # Output edge break else: continue if is_scalar: memlet.subset = sbs.Indices([0]) else: newsubset = [None] * len(memlet.subset) for ind, r in enumerate(memlet.subset): if ind in lost_dims: continue if isinstance(memlet.subset[ind], tuple): begin = r[0] - offset[ind] end = r[1] - offset[ind] step = r[2] newsubset[ind] = (begin, end, step) else: newsubset[ind] = ( r - offset[ind], r - offset[ind], 1, ) memlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) memlet.data = node.data if self.fullcopy: edge.data.subset = sbs.Range.from_array( node.desc(sdfg)) edge.data.other_subset = newmemlet.subset graph.add_edge(edge.src, edge.src_conn, node, None, edge.data) graph.remove_edge(edge) for array_name, node in out_cloned_arraynodes.items(): graph.add_node(node) is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar) for edge in all_out_edges: if edge.data.data == array_name: newmemlet = copy.deepcopy(edge.data) newmemlet.data = node.data if is_scalar: newmemlet.subset = sbs.Indices([0]) else: offset = [] lost_dims = [] lost_ranges = [] newsubset = [None] * len(edge.data.subset) for ind, r in enumerate(edge.data.subset): offset.append(r[0]) if isinstance(edge.data.subset[ind], tuple): begin = edge.data.subset[ind][0] - r[0] end = edge.data.subset[ind][1] - r[0] step = edge.data.subset[ind][2] if begin == end: lost_dims.append(ind) lost_ranges.append((begin, end, step)) else: newsubset[ind] = (begin, end, step) else: newsubset[ind] -= r[0] if len(lost_dims) == len(edge.data.subset): lost_dims.pop() newmemlet.subset = type( edge.data.subset)([lost_ranges[-1]]) else: newmemlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) graph.add_edge(edge.src, edge.src_conn, node, None, newmemlet) end_node = graph.entry_node(edge.src) for e in graph.bfs_edges(edge.src, reverse=True): parent, _, _child, _, memlet = e if parent == end_node: break if memlet.data != edge.data.data: continue path = graph.memlet_path(e) if not isinstance(path[0].dst, nodes.CodeNode): if in_path(path, e, nodes.EntryNode, forward=False): if isinstance(parent, nodes.CodeNode): # Output edge break else: continue if is_scalar: memlet.subset = sbs.Indices([0]) else: newsubset = [None] * len(memlet.subset) for ind, r in enumerate(memlet.subset): if ind in lost_dims: continue if isinstance(memlet.subset[ind], tuple): begin = r[0] - offset[ind] end = r[1] - offset[ind] step = r[2] newsubset[ind] = (begin, end, step) else: newsubset[ind] = ( r - offset[ind], r - offset[ind], 1, ) memlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) memlet.data = node.data edge.data.wcr = None if self.fullcopy: edge.data.subset = sbs.Range.from_array( node.desc(sdfg)) edge.data.other_subset = newmemlet.subset graph.add_edge(node, None, edge.dst, edge.dst_conn, edge.data) graph.remove_edge(edge) # Fourth, replace memlet arrays as necessary if self.expr_index == 0: scope_subgraph = graph.scope_subgraph(cnode) for edge in scope_subgraph.edges(): if edge.data.data is not None and edge.data.data in cloned_arrays: edge.data.data = cloned_arrays[edge.data.data]
class Stream(Data): """ Stream (or stream array) data descriptor. """ # Properties strides = Property(dtype=list) offset = Property(dtype=list) buffer_size = Property(dtype=int, desc="Size of internal buffer.") veclen = Property(dtype=int, desc="Vector length. Memlets must adhere to this.") def __init__(self, dtype, veclen, buffer_size, shape=None, transient=False, storage=dace.types.StorageType.Default, location='', strides=None, offset=None, toplevel=False, debuginfo=None): if shape is None: shape = (1, ) self.veclen = veclen self.buffer_size = buffer_size if strides is not None: if len(strides) != len(shape): raise TypeError('Strides must be the same size as shape') self.strides = cp.copy(strides) else: self.strides = cp.copy(list(shape)) if offset is not None: if len(offset) != len(shape): raise TypeError('Offset must be the same size as shape') self.offset = cp.copy(offset) else: self.offset = [0] * len(shape) super(Stream, self).__init__(dtype, shape, transient, storage, location, toplevel, debuginfo) def __repr__(self): return 'Stream (dtype=%s, shape=%s)' % (self.dtype, self.shape) def clone(self): return Stream(self.dtype, self.veclen, self.buffer_size, self.shape, self.transient, self.storage, self.location, self.strides, self.offset, self.toplevel, self.debuginfo) # Checks for equivalent shape and type def is_equivalent(self, other): if not isinstance(other, Stream): return False # Test type if self.dtype != other.dtype: return False # Test dimensionality if len(self.shape) != len(other.shape): return False # Test shape for dim, otherdim in zip(self.shape, other.shape): # If both are symbols, ensure equality if symbolic.issymbolic(dim) and symbolic.issymbolic(otherdim): if dim != otherdim: return False # If one is a symbol and the other is a constant # make sure they are equivalent elif symbolic.issymbolic(otherdim): if symbolic.eval(otherdim) != dim: return False elif symbolic.issymbolic(dim): if symbolic.eval(dim) != otherdim: return False else: # Any other case (constant vs. constant), check for equality if otherdim != dim: return False return True def signature(self, with_types=True, for_call=False, name=None): if not with_types or for_call: return name if self.storage in [ dace.types.StorageType.GPU_Global, dace.types.StorageType.GPU_Shared, dace.types.StorageType.GPU_Stack ]: return 'dace::GPUStream<%s, %s> %s' % (str( self.dtype.ctype), 'true' if sp.log( self.buffer_size, 2).is_Integer else 'false', name) return 'dace::Stream<%s> %s' % (str(self.dtype.ctype), name) def sizes(self): return [ d.name if isinstance(d, symbolic.symbol) else str(d) for d in self.shape ] def size_string(self): return (" * ".join([ cppunparse.pyexpr2cpp(dace.symbolic.symstr(s)) for s in self.strides ])) def is_stream_array(self): return functools.reduce(lambda a, b: a * b, self.strides) != 1 def covers_range(self, rng): if len(rng) != len(self.shape): return False for s, (rb, re, rs) in zip(self.shape, rng): # Shape has to be positive if isinstance(s, sympy.Basic): olds = s if 'positive' in s.assumptions0: s = sympy.Symbol(str(s), **s.assumptions0) else: s = sympy.Symbol(str(s), positive=True, **s.assumptions0) if isinstance(rb, sympy.Basic): rb = rb.subs({olds: s}) if isinstance(re, sympy.Basic): re = re.subs({olds: s}) if isinstance(rs, sympy.Basic): rs = rs.subs({olds: s}) try: if rb < 0: # Negative offset return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0), # 'If this expression is false, please refine symbol definitions in the program.') try: if re > s: # Beyond shape return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s), # 'If this expression is false, please refine symbol definitions in the program.') return True
class MapReduceFusion(pm.Transformation): """ Implements the map-reduce-fusion transformation. Fuses a map with an immediately following reduction, where the array between the map and the reduction is not used anywhere else. """ no_init = Property( dtype=bool, default=False, desc='If enabled, does not create initialization states ' 'for reduce nodes with identity') _tasklet = nodes.Tasklet('_') _tmap_exit = nodes.MapExit(nodes.Map("", [], [])) _in_array = nodes.AccessNode('_') import dace.libraries.standard as stdlib # Avoid import loop _reduce = stdlib.Reduce() _out_array = nodes.AccessNode('_') @staticmethod def expressions(): return [ sdutil.node_path_graph(MapReduceFusion._tasklet, MapReduceFusion._tmap_exit, MapReduceFusion._in_array, MapReduceFusion._reduce, MapReduceFusion._out_array) ] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): tmap_exit = graph.nodes()[candidate[MapReduceFusion._tmap_exit]] in_array = graph.nodes()[candidate[MapReduceFusion._in_array]] reduce_node = graph.nodes()[candidate[MapReduceFusion._reduce]] tasklet = graph.nodes()[candidate[MapReduceFusion._tasklet]] # Make sure that the array is only accessed by the map and the reduce if any([ src != tmap_exit for src, _, _, _, memlet in graph.in_edges(in_array) ]): return False if any([ dest != reduce_node for _, _, dest, _, memlet in graph.out_edges(in_array) ]): return False tmem = next(e for e in graph.edges_between(tasklet, tmap_exit) if e.data.data == in_array.data).data # (strict) Make sure that the transient is not accessed anywhere else # in this state or other states if strict and (len([ n for n in graph.nodes() if isinstance(n, nodes.AccessNode) and n.data == in_array.data ]) > 1 or in_array.data in sdfg.shared_transients()): return False # If memlet already has WCR and it is different from reduce node, # do not match if tmem.wcr is not None and tmem.wcr != reduce_node.wcr: return False # Verify that reduction ranges match tasklet map tout_memlet = graph.in_edges(in_array)[0].data rin_memlet = graph.out_edges(in_array)[0].data if tout_memlet.subset != rin_memlet.subset: return False return True @staticmethod def match_to_str(graph, candidate): tasklet = candidate[MapReduceFusion._tasklet] map_exit = candidate[MapReduceFusion._tmap_exit] reduce = candidate[MapReduceFusion._reduce] return ' -> '.join(str(node) for node in [tasklet, map_exit, reduce]) def apply(self, sdfg: SDFG): graph = sdfg.nodes()[self.state_id] tmap_exit = graph.nodes()[self.subgraph[MapReduceFusion._tmap_exit]] in_array = graph.nodes()[self.subgraph[MapReduceFusion._in_array]] reduce_node = graph.nodes()[self.subgraph[MapReduceFusion._reduce]] out_array = graph.nodes()[self.subgraph[MapReduceFusion._out_array]] # Set nodes to remove according to the expression index nodes_to_remove = [in_array] nodes_to_remove.append(reduce_node) memlet_edge = None for edge in graph.in_edges(tmap_exit): if edge.data.data == in_array.data: memlet_edge = edge break if memlet_edge is None: raise RuntimeError('Reduction memlet cannot be None') # Find which indices should be removed from new memlet input_edge = graph.in_edges(reduce_node)[0] axes = reduce_node.axes or list(range(len(input_edge.data.subset))) array_edge = graph.out_edges(reduce_node)[0] # Delete relevant edges and nodes graph.remove_nodes_from(nodes_to_remove) # Filter out reduced dimensions from subset filtered_subset = [ dim for i, dim in enumerate(memlet_edge.data.subset) if i not in axes ] if len(filtered_subset) == 0: # Output is a scalar filtered_subset = [(0, 0, 1)] # Modify edge from tasklet to map exit memlet_edge.data.data = out_array.data memlet_edge.data.wcr = reduce_node.wcr memlet_edge.data.subset = type( memlet_edge.data.subset)(filtered_subset) # Add edge from map exit to output array graph.add_edge( memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:], array_edge.dst, array_edge.dst_conn, Memlet(array_edge.data.data, array_edge.data.num_accesses, array_edge.data.subset, array_edge.data.veclen, reduce_node.wcr)) # Add initialization state as necessary if reduce_node.identity is not None: init_state = sdfg.add_state_before(graph) init_state.add_mapped_tasklet( 'freduce_init', [('o%d' % i, '%s:%s:%s' % (r[0], r[1] + 1, r[2])) for i, r in enumerate(array_edge.data.subset)], {}, 'out = %s' % reduce_node.identity, { 'out': Memlet.simple( array_edge.data.data, ','.join([ 'o%d' % i for i in range(len(array_edge.data.subset)) ])) }, external_edges=True)
class Memlet(object): """ Data movement object. Represents the data, the subset moved, and the manner it is reindexed (`other_subset`) into the destination. If there are multiple conflicting writes, this object also specifies how they are resolved with a lambda function. """ # Properties veclen = Property(dtype=int, desc="Vector length") num_accesses = SymbolicProperty(default=0) subset = SubsetProperty(default=subsets.Range([])) other_subset = SubsetProperty(allow_none=True) data = DataProperty() debuginfo = DebugInfoProperty() wcr = LambdaProperty(allow_none=True) wcr_identity = Property(dtype=object, default=None, allow_none=True) wcr_conflict = Property(dtype=bool, default=True) allow_oob = Property(dtype=bool, default=False, desc='Bypass out-of-bounds validation') def __init__(self, data, num_accesses, subset, vector_length, wcr=None, wcr_identity=None, other_subset=None, debuginfo=None, wcr_conflict=True): """ Constructs a Memlet. :param data: The data object or name to access. B{Note:} this parameter will soon be deprecated. @type data: Either a string of the data descriptor name or an AccessNode. :param num_accesses: The number of times that the moved data will be subsequently accessed. If `dace.dtypes.DYNAMIC` (-1), designates that the number of accesses is unknown at compile time. :param subset: The subset of `data` that is going to be accessed. :param vector_length: The length of a single unit of access to the data (used for vectorization optimizations). :param wcr: A lambda function specifying how write-conflicts are resolved. The syntax of the lambda function receives two elements: `current` value and `new` value, and returns the value after resolution. For example, summation is `lambda cur, new: cur + new`. :param wcr_identity: Identity value used for the first write conflict. B{Note:} this parameter will soon be deprecated. :param other_subset: The reindexing of `subset` on the other connected data. :param debuginfo: Source-code information (e.g., line, file) used for debugging. :param wcr_conflict: If False, forces non-locked conflict resolution when generating code. The default is to let the code generator infer this information from the SDFG. """ # Properties self.num_accesses = num_accesses # type: sympy.expr.Expr self.subset = subset # type: subsets.Subset self.veclen = vector_length # type: int if hasattr(data, 'data'): data = data.data self.data = data # type: str # Annotates memlet with _how_ writing is performed in case of conflict self.wcr = wcr self.wcr_identity = wcr_identity self.wcr_conflict = wcr_conflict # The subset of the other endpoint we are copying from/to (note: # carries the dimensionality of the other endpoint too!) self.other_subset = other_subset self.debuginfo = debuginfo def to_json(self, parent_graph=None): attrs = dace.serialize.all_properties_to_json(self) retdict = {"type": "Memlet", "label": str(self), "attributes": attrs} return retdict @staticmethod def from_json(json_obj, context=None): if json_obj['type'] != "Memlet": raise TypeError("Invalid data type") # Create dummy object ret = Memlet("", dace.dtypes.DYNAMIC, None, 1) dace.serialize.set_properties_from_json(ret, json_obj, context=context) return ret @staticmethod def simple(data, subset_str, veclen=1, wcr_str=None, wcr_identity=None, other_subset_str=None, wcr_conflict=True, num_accesses=None, debuginfo=None): """ Constructs a Memlet from string-based expressions. :param data: The data object or name to access. B{Note:} this parameter will soon be deprecated. @type data: Either a string of the data descriptor name or an AccessNode. :param subset_str: The subset of `data` that is going to be accessed in string format. Example: '0:N'. :param veclen: The length of a single unit of access to the data (used for vectorization optimizations). :param wcr_str: A lambda function (as a string) specifying how write-conflicts are resolved. The syntax of the lambda function receives two elements: `current` value and `new` value, and returns the value after resolution. For example, summation is `'lambda cur, new: cur + new'`. :param wcr_identity: Identity value used for the first write conflict. B{Note:} this parameter will soon be deprecated. :param other_subset_str: The reindexing of `subset` on the other connected data (as a string). :param wcr_conflict: If False, forces non-locked conflict resolution when generating code. The default is to let the code generator infer this information from the SDFG. :param num_accesses: The number of times that the moved data will be subsequently accessed. If `dace.dtypes.DYNAMIC` (-1), designates that the number of accesses is unknown at compile time. :param debuginfo: Source-code information (e.g., line, file) used for debugging. """ subset = SubsetProperty.from_string(subset_str) if num_accesses is not None: na = num_accesses else: na = subset.num_elements() if wcr_str is not None: wcr = LambdaProperty.from_string(wcr_str) else: wcr = None if other_subset_str is not None: other_subset = SubsetProperty.from_string(other_subset_str) else: other_subset = None # If it is an access node or another memlet if hasattr(data, 'data'): data = data.data return Memlet(data, na, subset, veclen, wcr=wcr, wcr_identity=wcr_identity, other_subset=other_subset, wcr_conflict=wcr_conflict, debuginfo=debuginfo) @staticmethod def from_array(dataname, datadesc): """ Constructs a Memlet that transfers an entire array's contents. :param dataname: The name of the data descriptor in the SDFG. :param datadesc: The data descriptor object. @type datadesc: Data. """ range = subsets.Range.from_array(datadesc) return Memlet(dataname, range.num_elements(), range, 1) def __hash__(self): return hash((self.data, self.num_accesses, self.subset, self.veclen, str(self.wcr), self.wcr_identity, self.other_subset)) def __eq__(self, other): return all([ self.data == other.data, self.num_accesses == other.num_accesses, self.subset == other.subset, self.veclen == other.veclen, self.wcr == other.wcr, self.wcr_identity == other.wcr_identity, self.other_subset == other.other_subset ]) def num_elements(self): """ Returns the number of elements in the Memlet subset. """ return self.subset.num_elements() def bounding_box_size(self): """ Returns a per-dimension upper bound on the maximum number of elements in each dimension. This bound will be tight in the case of Range. """ return self.subset.bounding_box_size() def validate(self, sdfg, state): if self.data is not None and self.data not in sdfg.arrays: raise KeyError('Array "%s" not found in SDFG' % self.data) def __label__(self, sdfg, state): """ Returns a string representation of the memlet for display in a graph. :param sdfg: The SDFG in which the memlet resides. :param state: An SDFGState object in which the memlet resides. """ if self.data is None: return self._label(None) return self._label(sdfg.arrays[self.data].shape) def __str__(self): return self._label(None) def _label(self, shape): result = '' if self.data is not None: result = self.data if self.subset is None: return result num_elements = self.subset.num_elements() if self.num_accesses != num_elements: if self.num_accesses == -1: result += '(dyn) ' else: result += '(%s) ' % SymbolicProperty.to_string( self.num_accesses) arrayNotation = True try: if shape is not None and reduce(operator.mul, shape, 1) == 1: # Don't draw array if we're accessing a single element and it's zero if all(s == 0 for s in self.subset.min_element()): arrayNotation = False except TypeError: # Will fail if trying to check the truth value of a sympy expr pass if arrayNotation: result += '[%s]' % str(self.subset) if self.wcr is not None and str(self.wcr) != '': # Autodetect reduction type redtype = detect_reduction_type(self.wcr) if redtype == dtypes.ReductionType.Custom: wcrstr = unparse(ast.parse(self.wcr).body[0].value.body) else: wcrstr = str(redtype) wcrstr = wcrstr[wcrstr.find('.') + 1:] # Skip "ReductionType." result += ' (CR: %s' % wcrstr if self.wcr_identity is not None: result += ', id: %s' % str(self.wcr_identity) result += ')' if self.other_subset is not None: result += ' -> [%s]' % str(self.other_subset) return result def __repr__(self): return "Memlet (" + self.__str__() + ")"
class Scalar(Data): """ Data descriptor of a scalar value. """ allow_conflicts = Property(dtype=bool, default=False) def __init__(self, dtype, transient=False, storage=dtypes.StorageType.Default, allow_conflicts=False, location=None, lifetime=dtypes.AllocationLifetime.Scope, debuginfo=None): self.allow_conflicts = allow_conflicts shape = [1] super(Scalar, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo) @staticmethod def from_json(json_obj, context=None): if json_obj['type'] != "Scalar": raise TypeError("Invalid data type") # Create dummy object ret = Scalar(dtypes.int8) serialize.set_properties_from_json(ret, json_obj, context=context) # Check validity now ret.validate() return ret def __repr__(self): return 'Scalar (dtype=%s)' % self.dtype def clone(self): return Scalar(self.dtype, self.transient, self.storage, self.allow_conflicts, self.location, self.lifetime, self.debuginfo) @property def strides(self): return [1] @property def total_size(self): return 1 @property def offset(self): return [0] def is_equivalent(self, other): if not isinstance(other, Scalar): return False if self.dtype != other.type: return False return True def as_arg(self, with_types=True, for_call=False, name=None): if not with_types or for_call: return name return self.dtype.as_arg(name) def sizes(self): return None def covers_range(self, rng): if len(rng) != 1: return False rng = rng[0] try: if (rng[1] - rng[0]) > rng[2]: return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % ((rng[1] - rng[0]) > rng[2]), # 'If this expression is false, please refine symbol definitions in the program.') return True
class Transformation(TransformationBase): """ Base class for pattern-matching transformations, as well as a static registry of transformations, where new transformations can be added in a decentralized manner. An instance of a Transformation represents a match of the transformation on an SDFG, complete with a subgraph candidate and properties. New transformations that extend this class must contain static `PatternNode` fields that represent the nodes in the pattern graph, and use them to implement at least three methods: * `expressions`: A method that returns a list of graph patterns (SDFG or SDFGState objects) that match this transformation. * `can_be_applied`: A method that, given a subgraph candidate, checks for additional conditions whether it can be transformed. * `apply`: A method that applies the transformation on the given SDFG. For more information and optimization opportunities, see the respective methods' documentation. In order to be included in lists and apply through the `sdfg.apply_transformations` API, each transformation shouls be registered with ``Transformation.register`` (or, more commonly, the ``@dace.registry.autoregister_params`` class decorator) with two optional boolean keyword arguments: ``singlestate`` (default: False) and ``coarsening`` (default: False). If ``singlestate`` is True, the transformation is matched on subgraphs inside an SDFGState; otherwise, subgraphs of the SDFG state machine are matched. If ``coarsening`` is True, this transformation will be performed automatically as part of SDFG dataflow coarsening. """ # Properties sdfg_id = Property(dtype=int, category="(Debug)") state_id = Property(dtype=int, category="(Debug)") _subgraph = DictProperty(key_type=int, value_type=int, category="(Debug)") expr_index = Property(dtype=int, category="(Debug)") def annotates_memlets(self) -> bool: """ Indicates whether the transformation annotates the edges it creates or modifies with the appropriate memlets. This determines whether to apply memlet propagation after the transformation. """ return False def expressions(self) -> List[gr.SubgraphView]: """ Returns a list of Graph objects that will be matched in the subgraph isomorphism phase. Used as a pre-pass before calling `can_be_applied`. :see: Transformation.can_be_applied """ raise NotImplementedError def can_be_applied(self, graph: Union[SDFG, SDFGState], candidate: Dict['PatternNode', int], expr_index: int, sdfg: SDFG, permissive: bool = False) -> bool: """ Returns True if this transformation can be applied on the candidate matched subgraph. :param graph: SDFGState object if this Transformation is single-state, or SDFG object otherwise. :param candidate: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. :param expr_index: The list index from `Transformation.expressions` that was matched. :param sdfg: If `graph` is an SDFGState, its parent SDFG. Otherwise should be equal to `graph`. :param permissive: Whether transformation should run in permissive mode. :return: True if the transformation can be applied. """ raise NotImplementedError def apply(self, sdfg: SDFG) -> Union[Any, None]: """ Applies this transformation instance on the matched pattern graph. :param sdfg: The SDFG to apply the transformation to. :return: A transformation-defined return value, which could be used to pass analysis data out, or nothing. """ raise NotImplementedError def match_to_str(self, graph: Union[SDFG, SDFGState], candidate: Dict['PatternNode', int]) -> str: """ Returns a string representation of the pattern match on the candidate subgraph. Used when identifying matches in the console UI. """ return str(list(candidate.values())) def __init__(self, sdfg_id: int, state_id: int, subgraph: Dict['PatternNode', int], expr_index: int, override: bool = False, options: Optional[Dict[str, Any]] = None) -> None: """ Initializes an instance of Transformation match. :param sdfg_id: A unique ID of the SDFG. :param state_id: The node ID of the SDFG state, if applicable. If transformation does not operate on a single state, the value should be -1. :param subgraph: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. :param expr_index: The list index from `Transformation.expressions` that was matched. :param override: If True, accepts the subgraph dictionary as-is (mostly for internal use). :param options: An optional dictionary of transformation properties :raise TypeError: When transformation is not subclass of Transformation. :raise TypeError: When state_id is not instance of int. :raise TypeError: When subgraph is not a dict of PatternNode : int. """ self.sdfg_id = sdfg_id self.state_id = state_id if not override: expr = self.expressions()[expr_index] for value in subgraph.values(): if not isinstance(value, int): raise TypeError('All values of ' 'subgraph' ' dictionary must be ' 'instances of int.') self._subgraph = {expr.node_id(k): v for k, v in subgraph.items()} else: self._subgraph = {-1: -1} # Serializable subgraph with node IDs as keys self._subgraph_user = copy.copy(subgraph) self.expr_index = expr_index # Ease-of-use API: Set new pattern-nodes with information about this # instance. for pname, pval in self._get_pattern_nodes().items(): # Create new pattern node from existing field new_pnode = PatternNode( pval.node if isinstance(pval, PatternNode) else type(pval)) new_pnode.match_instance = self # Append existing values in subgraph dictionary if pval in self._subgraph_user: self._subgraph_user[new_pnode] = self._subgraph_user[pval] # Override static field with the new node in this instance only setattr(self, pname, new_pnode) # Set properties if options is not None: for optname, optval in options.items(): setattr(self, optname, optval) @property def subgraph(self): return self._subgraph_user def apply_pattern(self, sdfg: SDFG, append: bool = True, annotate: bool = True) -> Union[Any, None]: """ Applies this transformation on the given SDFG, using the transformation instance to find the right SDFG object (based on SDFG ID), and applying memlet propagation as necessary. :param sdfg: The SDFG (or an SDFG in the same hierarchy) to apply the transformation to. :param append: If True, appends the transformation to the SDFG transformation history. :return: A transformation-defined return value, which could be used to pass analysis data out, or nothing. """ if append: sdfg.append_transformation(self) tsdfg: SDFG = sdfg.sdfg_list[self.sdfg_id] retval = self.apply(tsdfg) if annotate and not self.annotates_memlets(): propagation.propagate_memlets_sdfg(tsdfg) return retval def __lt__(self, other: 'Transformation') -> bool: """ Comparing two transformations by their class name and node IDs in match. Used for ordering transformations consistently. """ if type(self) != type(other): return type(self).__name__ < type(other).__name__ self_ids = iter(self.subgraph.values()) other_ids = iter(self.subgraph.values()) try: self_id = next(self_ids) except StopIteration: return True try: other_id = next(other_ids) except StopIteration: return False self_end = False while self_id is not None and other_id is not None: if self_id != other_id: return self_id < other_id try: self_id = next(self_ids) except StopIteration: self_end = True try: other_id = next(other_ids) except StopIteration: if self_end: # Transformations are equal return False return False if self_end: return True @classmethod def _get_pattern_nodes(cls) -> Dict[str, 'PatternNode']: """ Returns a dictionary of pattern-matching node in this transformation subclass. Used internally for pattern-matching. :return: A dictionary mapping between pattern-node name and its type. """ return { k: getattr(cls, k) for k in dir(cls) if isinstance(getattr(cls, k), PatternNode) or (k.startswith( '_') and isinstance(getattr(cls, k), (nd.Node, SDFGState))) } @classmethod def apply_to(cls, sdfg: SDFG, options: Optional[Dict[str, Any]] = None, expr_index: int = 0, verify: bool = True, annotate: bool = True, permissive: bool = False, save: bool = True, **where: Union[nd.Node, SDFGState]): """ Applies this transformation to a given subgraph, defined by a set of nodes. Raises an error if arguments are invalid or transformation is not applicable. The subgraph is defined by the `where` dictionary, where each key is taken from the `PatternNode` fields of the transformation. For example, applying `MapCollapse` on two maps can pe performed as follows: ``` MapCollapse.apply_to(sdfg, outer_map_entry=map_a, inner_map_entry=map_b) ``` :param sdfg: The SDFG to apply the transformation to. :param options: A set of parameters to use for applying the transformation. :param expr_index: The pattern expression index to try to match with. :param verify: Check that `can_be_applied` returns True before applying. :param annotate: Run memlet propagation after application if necessary. :param permissive: Apply transformation in permissive mode. :param save: Save transformation as part of the SDFG file. Set to False if composing transformations. :param where: A dictionary of node names (from the transformation) to nodes in the SDFG or a single state. """ if len(where) == 0: raise ValueError('At least one node is required') options = options or {} # Check that all keyword arguments are nodes and if interstate or not sample_node = next(iter(where.values())) if isinstance(sample_node, SDFGState): graph = sdfg state_id = -1 elif isinstance(sample_node, nd.Node): graph = next(s for s in sdfg.nodes() if sample_node in s.nodes()) state_id = sdfg.node_id(graph) else: raise TypeError('Invalid node type "%s"' % type(sample_node).__name__) # Check that all nodes in the pattern are set required_nodes = cls.expressions()[expr_index].nodes() required_node_names = { pname: pval for pname, pval in cls._get_pattern_nodes().items() if pval in required_nodes } required = set(required_node_names.keys()) intersection = required & set(where.keys()) if len(required - intersection) > 0: raise ValueError('Missing nodes for transformation subgraph: %s' % (required - intersection)) # Construct subgraph and instantiate transformation subgraph = { required_node_names[k]: graph.node_id(where[k]) for k in required } instance = cls(sdfg.sdfg_id, state_id, subgraph, expr_index) # Construct transformation parameters for optname, optval in options.items(): if not optname in cls.__properties__: raise ValueError('Property "%s" not found in transformation' % optname) setattr(instance, optname, optval) if verify: if not instance.can_be_applied( graph, subgraph, expr_index, sdfg, permissive=permissive): raise ValueError('Transformation cannot be applied on the ' 'given subgraph ("can_be_applied" failed)') # Apply to SDFG return instance.apply_pattern(sdfg, annotate=annotate, append=save) def __str__(self) -> str: return type(self).__name__ def print_match(self, sdfg: SDFG) -> str: """ Returns a string representation of the pattern match on the given SDFG. Used for printing matches in the console UI. """ if not isinstance(sdfg, SDFG): raise TypeError("Expected SDFG, got: {}".format( type(sdfg).__name__)) if self.state_id == -1: graph = sdfg else: graph = sdfg.nodes()[self.state_id] string = type(self).__name__ + ' in ' string += self.match_to_str(graph, self.subgraph) return string def to_json(self, parent=None) -> Dict[str, Any]: props = serialize.all_properties_to_json(self) return { 'type': 'Transformation', 'transformation': type(self).__name__, **props } @staticmethod def from_json(json_obj: Dict[str, Any], context: Dict[str, Any] = None) -> 'Transformation': xform = next(ext for ext in Transformation.extensions().keys() if ext.__name__ == json_obj['transformation']) # Recreate subgraph expr = xform.expressions()[json_obj['expr_index']] subgraph = { expr.node(int(k)): int(v) for k, v in json_obj['_subgraph'].items() } # Reconstruct transformation ret = xform(json_obj['sdfg_id'], json_obj['state_id'], subgraph, json_obj['expr_index']) context = context or {} context['transformation'] = ret serialize.set_properties_from_json( ret, json_obj, context=context, ignore_properties={'transformation', 'type'}) return ret
class Array(Data): """ Array/constant descriptor (dimensions, type and other properties). """ # Properties allow_conflicts = Property( dtype=bool, default=False, desc='If enabled, allows more than one ' 'memlet to write to the same memory location without conflict ' 'resolution.') strides = ShapeProperty( # element_type=symbolic.pystr_to_symbolic, desc='For each dimension, the number of elements to ' 'skip in order to obtain the next element in ' 'that dimension.') total_size = SymbolicProperty( default=1, desc='The total allocated size of the array. Can be used for' ' padding.') offset = ListProperty(element_type=symbolic.pystr_to_symbolic, desc='Initial offset to translate all indices by.') may_alias = Property(dtype=bool, default=False, desc='This pointer may alias with other pointers in ' 'the same function') alignment = Property(dtype=int, default=0, desc='Allocation alignment in bytes (0 uses ' 'compiler-default)') def __init__(self, dtype, shape, transient=False, allow_conflicts=False, storage=dtypes.StorageType.Default, location=None, strides=None, offset=None, may_alias=False, lifetime=dtypes.AllocationLifetime.Scope, alignment=0, debuginfo=None, total_size=None): super(Array, self).__init__(dtype, shape, transient, storage, location, lifetime, debuginfo) if shape is None: raise IndexError('Shape must not be None') self.allow_conflicts = allow_conflicts self.may_alias = may_alias self.alignment = alignment if strides is not None: self.strides = cp.copy(strides) else: self.strides = [_prod(shape[i + 1:]) for i in range(len(shape))] self.total_size = total_size or _prod(shape) if offset is not None: self.offset = cp.copy(offset) else: self.offset = [0] * len(shape) self.validate() def __repr__(self): return 'Array (dtype=%s, shape=%s)' % (self.dtype, self.shape) def clone(self): return Array(self.dtype, self.shape, self.transient, self.allow_conflicts, self.storage, self.location, self.strides, self.offset, self.may_alias, self.lifetime, self.alignment, self.debuginfo, self.total_size) def to_json(self): attrs = serialize.all_properties_to_json(self) # Take care of symbolic expressions attrs['strides'] = list(map(str, attrs['strides'])) retdict = {"type": type(self).__name__, "attributes": attrs} return retdict @staticmethod def from_json(json_obj, context=None): if json_obj['type'] != "Array": raise TypeError("Invalid data type") # Create dummy object ret = Array(dtypes.int8, ()) serialize.set_properties_from_json(ret, json_obj, context=context) # TODO: This needs to be reworked (i.e. integrated into the list property) ret.strides = list(map(symbolic.pystr_to_symbolic, ret.strides)) # Check validity now ret.validate() return ret def validate(self): super(Array, self).validate() if len(self.strides) != len(self.shape): raise TypeError('Strides must be the same size as shape') if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.strides): raise TypeError('Strides must be a list or tuple of integer ' 'values or symbols') if len(self.offset) != len(self.shape): raise TypeError('Offset must be the same size as shape') def covers_range(self, rng): if len(rng) != len(self.shape): return False for s, (rb, re, rs) in zip(self.shape, rng): # Shape has to be positive if isinstance(s, sp.Basic): olds = s if 'positive' in s.assumptions0: s = sp.Symbol(str(s), **s.assumptions0) else: s = sp.Symbol(str(s), positive=True, **s.assumptions0) if isinstance(rb, sp.Basic): rb = rb.subs({olds: s}) if isinstance(re, sp.Basic): re = re.subs({olds: s}) if isinstance(rs, sp.Basic): rs = rs.subs({olds: s}) try: if rb < 0: # Negative offset return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0), # 'If this expression is false, please refine symbol definitions in the program.') try: if re > s: # Beyond shape return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s), # 'If this expression is false, please refine symbol definitions in the program.') return True # Checks for equivalent shape and type def is_equivalent(self, other): if not isinstance(other, Array): return False # Test type if self.dtype != other.dtype: return False # Test dimensionality if len(self.shape) != len(other.shape): return False # Test shape for dim, otherdim in zip(self.shape, other.shape): # Any other case (constant vs. constant), check for equality if otherdim != dim: return False return True def as_arg(self, with_types=True, for_call=False, name=None): arrname = name if not with_types or for_call: return arrname if self.may_alias: return str(self.dtype.ctype) + ' *' + arrname return str(self.dtype.ctype) + ' * __restrict__ ' + arrname def sizes(self): return [ d.name if isinstance(d, symbolic.symbol) else str(d) for d in self.shape ] @property def free_symbols(self): result = super().free_symbols for s in self.strides: if isinstance(s, sp.Expr): result |= set(s.free_symbols) if isinstance(self.total_size, sp.Expr): result |= set(self.total_size.free_symbols) for o in self.offset: if isinstance(o, sp.Expr): result |= set(o.free_symbols) return result
class OrthogonalTiling(pattern_matching.Transformation): """ Implements the orthogonal tiling transformation. Orthogonal tiling is a type of nested map fission that creates tiles in every dimension of the matched Map. """ _map_entry = nodes.MapEntry(nodes.Map("", [], [])) # Properties prefix = Property(dtype=str, default="tile", desc="Prefix for new iterators") tile_sizes = ShapeProperty(dtype=tuple, default=(128, 128, 128), desc="Tile size per dimension") divides_evenly = Property(dtype=bool, default=False, desc="Tile size divides dimension length evenly") @staticmethod def annotates_memlets(): return False @staticmethod def expressions(): return [nxutil.node_path_graph(OrthogonalTiling._map_entry)] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): return True @staticmethod def match_to_str(graph, candidate): map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]] return map_entry.map.label + ': ' + str(map_entry.map.params) def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] # Tile map. target_dim, new_dim, new_map = self.__stripmine( sdfg, graph, self.subgraph) return new_map def __stripmine(self, sdfg, graph, candidate): # Retrieve map entry and exit nodes. map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]] map_exit = graph.exit_nodes(map_entry)[0] # Map subgraph map_subgraph = graph.scope_subgraph(map_entry) # Retrieve transformation properties. prefix = self.prefix tile_sizes = self.tile_sizes divides_evenly = self.divides_evenly new_param = [] new_range = [] for dim_idx in range(len(map_entry.map.params)): if dim_idx >= len(tile_sizes): tile_size = tile_sizes[-1] else: tile_size = tile_sizes[dim_idx] # Retrieve parameter and range of dimension to be strip-mined. target_dim = map_entry.map.params[dim_idx] td_from, td_to, td_step = map_entry.map.range[dim_idx] new_dim = prefix + '_' + target_dim # Basic values if divides_evenly: tile_num = '(%s + 1 - %s) / %s' % (symbolic.symstr(td_to), symbolic.symstr(td_from), str(tile_size)) else: tile_num = 'int_ceil((%s + 1 - %s), %s)' % (symbolic.symstr( td_to), symbolic.symstr(td_from), str(tile_size)) # Outer map values (over all tiles) nd_from = 0 nd_to = symbolic.pystr_to_symbolic(str(tile_num) + ' - 1') nd_step = 1 # Inner map values (over one tile) td_from_new = dace.symbolic.pystr_to_symbolic(td_from) td_to_new_exact = symbolic.pystr_to_symbolic( 'min(%s + 1 - %s * %s, %s + %s) - 1' % (symbolic.symstr(td_to), str(new_dim), str(tile_size), td_from_new, str(tile_size))) td_to_new_approx = symbolic.pystr_to_symbolic( '%s + %s - 1' % (td_from_new, str(tile_size))) # Outer map (over all tiles) new_dim_range = (nd_from, nd_to, nd_step) new_param.append(new_dim) new_range.append(new_dim_range) # Inner map (over one tile) if divides_evenly: td_to_new = td_to_new_approx else: td_to_new = dace.symbolic.SymExpr(td_to_new_exact, td_to_new_approx) map_entry.map.range[dim_idx] = (td_from_new, td_to_new, td_step) # Fix subgraph memlets target_dim = dace.symbolic.pystr_to_symbolic(target_dim) offset = dace.symbolic.pystr_to_symbolic('%s * %s' % (new_dim, str(tile_size))) for _, _, _, _, memlet in map_subgraph.edges(): old_subset = memlet.subset if isinstance(old_subset, dace.subsets.Indices): new_indices = [] for idx in old_subset: new_idx = idx.subs(target_dim, target_dim + offset) new_indices.append(new_idx) memlet.subset = dace.subsets.Indices(new_indices) elif isinstance(old_subset, dace.subsets.Range): new_ranges = [] for i, old_range in enumerate(old_subset): if len(old_range) == 3: b, e, s, = old_range t = old_subset.tile_sizes[i] else: raise ValueError('Range %s is invalid.' % old_range) new_b = b.subs(target_dim, target_dim + offset) new_e = e.subs(target_dim, target_dim + offset) new_s = s.subs(target_dim, target_dim + offset) new_t = t.subs(target_dim, target_dim + offset) new_ranges.append((new_b, new_e, new_s, new_t)) memlet.subset = dace.subsets.Range(new_ranges) else: raise NotImplementedError new_map = nodes.Map(prefix + '_' + map_entry.map.label, new_param, subsets.Range(new_range)) new_map_entry = nodes.MapEntry(new_map) new_exit = nodes.MapExit(new_map) # Make internal map's schedule to "not parallel" map_entry.map._schedule = types.ScheduleType.Default # Redirect/create edges. new_in_edges = {} for _src, conn, _dest, _, memlet in graph.out_edges(map_entry): if not isinstance(sdfg.arrays[memlet.data], dace.data.Scalar): new_subset = copy.deepcopy(memlet.subset) # new_subset = calc_set_image(map_entry.map.params, # map_entry.map.range, memlet.subset, # cont_or_strided) if memlet.data in new_in_edges: src, src_conn, dest, dest_conn, new_memlet, num = \ new_in_edges[memlet.data] new_memlet.subset = calc_set_union( new_memlet.data, sdfg.arrays[nnew_memlet.data], new_memlet.subset, new_subset) new_memlet.num_accesses = new_memlet.num_elements() new_in_edges.update({ memlet.data: (src, src_conn, dest, dest_conn, new_memlet, min(num, int(conn[4:]))) }) else: new_memlet = dcpy(memlet) new_memlet.subset = new_subset new_memlet.num_accesses = new_memlet.num_elements() new_in_edges.update({ memlet.data: (new_map_entry, None, map_entry, None, new_memlet, int(conn[4:])) }) nxutil.change_edge_dest(graph, map_entry, new_map_entry) new_out_edges = {} for _src, conn, _dest, _, memlet in graph.in_edges(map_exit): if not isinstance(sdfg.arrays[memlet.data], dace.data.Scalar): new_subset = memlet.subset # new_subset = calc_set_image(map_entry.map.params, # map_entry.map.range, # memlet.subset, cont_or_strided) if memlet.data in new_out_edges: src, src_conn, dest, dest_conn, new_memlet, num = \ new_out_edges[memlet.data] new_memlet.subset = calc_set_union( new_memlet.data, sdfg.arrays[nnew_memlet.data], new_memlet.subset, new_subset) new_memlet.num_accesses = new_memlet.num_elements() new_out_edges.update({ memlet.data: (src, src_conn, dest, dest_conn, new_memlet, min(num, conn[4:])) }) else: new_memlet = dcpy(memlet) new_memlet.subset = new_subset new_memlet.num_accesses = new_memlet.num_elements() new_out_edges.update({ memlet.data: (map_exit, None, new_exit, None, new_memlet, conn[4:]) }) nxutil.change_edge_src(graph, map_exit, new_exit) # Connector related work follows # 1. Dictionary 'old_connector_number': 'new_connector_numer' # 2. New node in/out connectors # 3. New edges in_conn_nums = [] for _, e in new_in_edges.items(): _, _, _, _, _, num = e in_conn_nums.append(num) in_conn = {} for i, num in enumerate(in_conn_nums): in_conn.update({num: i + 1}) entry_in_connectors = set() entry_out_connectors = set() for i in range(len(in_conn_nums)): entry_in_connectors.add('IN_' + str(i + 1)) entry_out_connectors.add('OUT_' + str(i + 1)) new_map_entry.in_connectors = entry_in_connectors new_map_entry.out_connectors = entry_out_connectors for _, e in new_in_edges.items(): src, _, dst, _, memlet, num = e graph.add_edge(src, 'OUT_' + str(in_conn[num]), dst, 'IN_' + str(in_conn[num]), memlet) out_conn_nums = [] for _, e in new_out_edges.items(): _, _, dst, _, _, num = e if dst is not new_exit: continue out_conn_nums.append(num) out_conn = {} for i, num in enumerate(out_conn_nums): out_conn.update({num: i + 1}) exit_in_connectors = set() exit_out_connectors = set() for i in range(len(out_conn_nums)): exit_in_connectors.add('IN_' + str(i + 1)) exit_out_connectors.add('OUT_' + str(i + 1)) new_exit.in_connectors = exit_in_connectors new_exit.out_connectors = exit_out_connectors for _, e in new_out_edges.items(): src, _, dst, _, memlet, num = e graph.add_edge(src, 'OUT_' + str(out_conn[num]), dst, 'IN_' + str(out_conn[num]), memlet) # Return strip-mined dimension. return target_dim, new_dim, new_map @staticmethod def __modify_edges(sdfg, graph, candidate, target_dim, new_dim): map_entry = graph.nodes()[candidate[OrthogonalTiling._map_entry]] processed = [] for src, _dest, memlet, _scope in nxutil.traverse_sdfg_scope( graph, map_entry, True): if memlet in processed: continue processed.append(memlet) # Corner cases if isinstance(sdfg.arrays[memlet.data], dace.data.Stream): continue if memlet.wcr is not None: memlet.num_accesses = 1 continue for i, dim in enumerate(memlet.subset): if isinstance(dim, tuple): dim = tuple( symbolic.pystr_to_symbolic(d).subs( symbolic.pystr_to_symbolic(target_dim), symbolic.pystr_to_symbolic('%s + %s' % (str(new_dim), str(target_dim)))) for d in dim) else: dim = symbolic.pystr_to_symbolic(dim).subs( symbolic.pystr_to_symbolic(target_dim), symbolic.pystr_to_symbolic( '%s + %s' % (str(new_dim), str(target_dim)))) memlet.subset[i] = dim return
class Data(object): """ Data type descriptors that can be used as references to memory. Examples: Arrays, Streams, custom arrays (e.g., sparse matrices). """ dtype = TypeClassProperty(default=dtypes.int32) shape = ShapeProperty(default=[]) transient = Property(dtype=bool, default=False) storage = Property(dtype=dtypes.StorageType, desc="Storage location", choices=dtypes.StorageType, default=dtypes.StorageType.Default, from_string=lambda x: dtypes.StorageType[x]) lifetime = Property(dtype=dtypes.AllocationLifetime, desc='Data allocation span', choices=dtypes.AllocationLifetime, default=dtypes.AllocationLifetime.Scope, from_string=lambda x: dtypes.AllocationLifetime[x]) location = DictProperty( key_type=str, value_type=symbolic.pystr_to_symbolic, desc='Full storage location identifier (e.g., rank, GPU ID)') debuginfo = DebugInfoProperty(allow_none=True) def __init__(self, dtype, shape, transient, storage, location, lifetime, debuginfo): self.dtype = dtype self.shape = shape self.transient = transient self.storage = storage self.location = location if location is not None else {} self.lifetime = lifetime self.debuginfo = debuginfo self._validate() def validate(self): """ Validate the correctness of this object. Raises an exception on error. """ self._validate() # Validation of this class is in a separate function, so that this # class can call `_validate()` without calling the subclasses' # `validate` function. def _validate(self): if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.shape): raise TypeError('Shape must be a list or tuple of integer values ' 'or symbols') return True def to_json(self): attrs = serialize.all_properties_to_json(self) retdict = {"type": type(self).__name__, "attributes": attrs} return retdict @property def toplevel(self): return self.lifetime is not dtypes.AllocationLifetime.Scope def copy(self): raise RuntimeError( 'Data descriptors are unique and should not be copied') def is_equivalent(self, other): """ Check for equivalence (shape and type) of two data descriptors. """ raise NotImplementedError def as_arg(self, with_types=True, for_call=False, name=None): """Returns a string for a C++ function signature (e.g., `int *A`). """ raise NotImplementedError @property def free_symbols(self) -> Set[symbolic.SymbolicType]: """ Returns a set of undefined symbols in this data descriptor. """ result = set() for s in self.shape: if isinstance(s, sp.Basic): result |= set(s.free_symbols) return result def __repr__(self): return 'Abstract Data Container, DO NOT USE' @property def veclen(self): return self.dtype.veclen if hasattr(self.dtype, "veclen") else 1
class LibraryNode(CodeNode): name = Property(dtype=str, desc="Name of node") implementation = LibraryImplementationProperty( dtype=str, allow_none=True, desc=("Which implementation this library node will expand into." "Must match a key in the list of possible implementations.")) schedule = Property( dtype=dtypes.ScheduleType, desc="If set, determines the default device mapping of " "the node upon expansion, if expanded to a nested SDFG.", choices=dtypes.ScheduleType, from_string=lambda x: dtypes.ScheduleType[x], default=dtypes.ScheduleType.Default) debuginfo = DebugInfoProperty() def __init__(self, name, *args, **kwargs): super().__init__(*args, **kwargs) self.name = name self.label = name # Overrides subclasses to return LibraryNode as their JSON type @property def __jsontype__(self): return 'LibraryNode' # Based on https://stackoverflow.com/a/2020083/6489142 def _fullclassname(self): module = self.__class__.__module__ if module is None or module == str.__class__.__module__: return self.__class__.__name__ # Avoid reporting __builtin__ else: return module + '.' + self.__class__.__name__ def to_json(self, parent): jsonobj = super().to_json(parent) jsonobj['classpath'] = self._fullclassname() return jsonobj @classmethod def from_json(cls, json_obj, context=None): if cls == LibraryNode: clazz = pydoc.locate(json_obj['classpath']) if clazz is None: raise TypeError('Unrecognized library node type "%s"' % json_obj['classpath']) return clazz.from_json(json_obj, context) else: # Subclasses are actual library nodes ret = cls(json_obj['attributes']['name']) dace.serialize.set_properties_from_json(ret, json_obj, context=context) return ret def expand(self, sdfg, state, *args, **kwargs) -> str: """ Create and perform the expansion transformation for this library node. :return: the name of the expanded implementation """ implementation = self.implementation library_name = getattr(type(self), '_dace_library_name', '') try: if library_name: config_implementation = Config.get("library", library_name, "default_implementation") else: config_implementation = None except KeyError: # Non-standard libraries are not defined in the config schema, and # thus might not exist in the config. config_implementation = None if config_implementation is not None: try: config_override = Config.get("library", library_name, "override") if config_override and implementation in self.implementations: if implementation is not None: warnings.warn( "Overriding explicitly specified " "implementation {} for {} with {}.".format( implementation, self.label, config_implementation)) implementation = config_implementation except KeyError: config_override = False # If not explicitly set, try the node default if implementation is None: implementation = type(self).default_implementation # If no node default, try library default if implementation is None: import dace.library # Avoid cyclic dependency lib = dace.library._DACE_REGISTERED_LIBRARIES[type( self)._dace_library_name] implementation = lib.default_implementation # Try the default specified in the config if implementation is None: implementation = config_implementation # Otherwise we don't know how to expand if implementation is None: raise ValueError("No implementation or default " "implementation specified.") if implementation not in self.implementations.keys(): raise KeyError("Unknown implementation for node {}: {}".format( type(self).__name__, implementation)) transformation_type = type(self).implementations[implementation] sdfg_id = sdfg.sdfg_id state_id = sdfg.nodes().index(state) subgraph = {transformation_type._match_node: state.node_id(self)} transformation = transformation_type(sdfg_id, state_id, subgraph, 0) transformation.apply(sdfg, *args, **kwargs) return implementation @classmethod def register_implementation(cls, name, transformation_type): """Register an implementation to belong to this library node type.""" cls.implementations[name] = transformation_type transformation_type._match_node = cls
class SubgraphFusion(transformation.SubgraphTransformation): """ Implements the SubgraphFusion transformation. Fuses together the maps contained in the subgraph and pushes inner nodes into a global outer map, creating transients and new connections where necessary. SubgraphFusion requires all lowest scope level maps in the subgraph to have the same indices and parameter range in every dimension. This can be achieved using the MultiExpansion transformation first. Reductions can also be expanded using ReduceExpansion as a preprocessing step. """ debug = Property(desc="Show debug info", dtype=bool, default=False) transient_allocation = Property( desc="Storage Location to push transients to that are " "fully contained within the subgraph.", dtype=dtypes.StorageType, default=dtypes.StorageType.Default) @staticmethod def can_be_applied(sdfg: SDFG, subgraph: SubgraphView) -> bool: ''' Fusible if 1. Maps have the same access sets and ranges in order 2. Any nodes in between two maps are AccessNodes only, without WCR There is at most one AccessNode only on a path between two maps, no other nodes are allowed 3. The exiting memlets' subsets to an intermediate edge must cover the respective incoming memlets' subset into the next map ''' # get graph graph = subgraph.graph for node in subgraph.nodes(): if node not in graph.nodes(): return False # next, get all the maps map_entries = helpers.get_highest_scope_maps(sdfg, graph, subgraph) map_exits = [graph.exit_node(map_entry) for map_entry in map_entries] maps = [map_entry.map for map_entry in map_entries] # 1. check whether all map ranges and indices are the same if len(maps) <= 1: return False base_map = maps[0] for map in maps: if map.get_param_num() != base_map.get_param_num(): return False if not all( [p1 == p2 for (p1, p2) in zip(map.params, base_map.params)]): return False if not map.range == base_map.range: return False # 1.1 check whether all map entries have the same schedule schedule = map_entries[0].schedule if not all([entry.schedule == schedule for entry in map_entries]): return False # 2. check intermediate feasiblility # see map_fusion.py for similar checks # we are being more relaxed here # 2.1 do some preparation work first: # calculate all out_nodes and intermediate_nodes # definition see in apply() intermediate_nodes = set() out_nodes = set() for map_entry, map_exit in zip(map_entries, map_exits): for edge in graph.out_edges(map_exit): current_node = edge.dst if len(graph.out_edges(current_node)) == 0: out_nodes.add(current_node) else: for dst_edge in graph.out_edges(current_node): if dst_edge.dst in map_entries: intermediate_nodes.add(current_node) else: out_nodes.add(current_node) # 2.2 topological feasibility: # For each intermediate and out node: must never reach any map # entry if it is not connected to map entry immediately visited = set() # for memoization purposes def visit_descendants(graph, node, visited, map_entries): # if we have already been at this node if node in visited: return True # not necessary to add if there aren't any other in connections if len(graph.in_edges(node)) > 1: visited.add(node) for oedge in graph.out_edges(node): if not visit_descendants(graph, oedge.dst, visited, map_entries): return False return True for node in intermediate_nodes | out_nodes: # these nodes must not lead to a map entry nodes_to_check = set() for oedge in graph.out_edges(node): if oedge.dst not in map_entries: nodes_to_check.add(oedge.dst) for forbidden_node in nodes_to_check: if not visit_descendants(graph, forbidden_node, visited, map_entries): return False # 2.3 memlet feasibility # For each intermediate node, look at whether inner adjacent # memlets of the exiting map cover inner adjacent memlets # of the next entering map. # We also check for any WCRs on the fly. for node in intermediate_nodes: upper_subsets = set() lower_subsets = set() # First, determine which dimensions of the memlet ranges # change with the map, we do not need to care about the other dimensions. total_dims = len(sdfg.data(node.data).shape) dims_to_discard = SubgraphFusion.get_invariant_dimensions( sdfg, graph, map_entries, map_exits, node) # find upper_subsets for in_edge in graph.in_edges(node): # first check for WCRs if in_edge.data.wcr: return False if in_edge.src in map_exits: edge = graph.memlet_path(in_edge)[-2] subset_to_add = dcpy(edge.data.subset\ if edge.data.data == node.data\ else edge.data.other_subset) subset_to_add.pop(dims_to_discard) upper_subsets.add(subset_to_add) else: raise NotImplementedError("Nodes between two maps to be" "fused with *incoming* edges" "from outside the maps are not" "allowed yet.") # find lower_subsets for out_edge in graph.out_edges(node): if out_edge.dst in map_entries: # cannot use memlet tree here as there could be # not just one map succedding. Do it manually for oedge in graph.out_edges(out_edge.dst): if oedge.src_conn[3:] == out_edge.dst_conn[2:]: subset_to_add = dcpy(oedge.data.subset \ if edge.data.data == node.data \ else edge.data.other_subset) subset_to_add.pop(dims_to_discard) lower_subsets.add(subset_to_add) upper_iter = iter(upper_subsets) union_upper = next(upper_iter) # TODO: add this check at a later point # We assume that upper_subsets for each data array # are contiguous # or do the full check if possible (intersection needed) ''' # check whether subsets in upper_subsets are adjacent. # this is a requriement for the current implementation #try: # O(n^2*|dims|) but very small amount of subsets anyway try: for dim in range(total_dims - len(dims_to_discard)): ordered_list = [(-1,-1,-1)] for upper_subset in upper_subsets: lo = upper_subset[dim][0] hi = upper_subset[dim][1] for idx,element in enumerate(ordered_list): if element[0] <= lo and element[1] >= hi: break if element[0] > lo: ordered_list.insert(idx, (lo,hi)) ordered_list.pop(0) highest = ordered_list[0][1] for i in range(len(ordered_list)): if i < len(ordered_list)-1: current_range = ordered_list[i] if current_range[1] > highest: hightest = current_range[1] next_range = ordered_list[i+1] if highest < next_range[0] - 1: return False except TypeError: #return False ''' # FORNOW: just omit warning if unsure for lower_subset in lower_subsets: covers = False for upper_subset in upper_subsets: if upper_subset.covers(lower_subset): covers = True break if not covers: warnings.warn( f"WARNING: For node {node}, please check assure that" "incoming memlets cover outgoing ones. Ambiguous check (WIP)." ) # now take union of upper subsets for subs in upper_iter: union_upper = subsets.union(union_upper, subs) if not union_upper: # something went wrong using union -- we'd rather abort return False # finally check coverage for lower_subset in lower_subsets: if not union_upper.covers(lower_subset): return False return True @staticmethod def get_invariant_dimensions(sdfg, graph, map_entries, map_exits, node): ''' on a non-fused graph, return a set of indices that correspond to array dimensions that do not change when we are entering maps for an access node ''' variate_dimensions = set() subset_length = -1 for in_edge in graph.in_edges(node): if in_edge.src in map_exits: other_edge = graph.memlet_path(in_edge)[-2] other_subset = other_edge.data.subset \ if other_edge.data.data == node.data \ else other_edge.data.other_subset for (idx, (ssbs1, ssbs2)) \ in enumerate(zip(in_edge.data.subset, other_subset)): if ssbs1 != ssbs2: variate_dimensions.add(idx) else: raise NotImplementedError("Nodes between two maps to be" "fused with *incoming* edges" "from outside the maps are not" "allowed yet.") if subset_length < 0: subset_length = other_subset.dims() else: assert other_subset.dims() == subset_length for out_edge in graph.out_edges(node): if out_edge.dst in map_entries: for other_edge in graph.out_edges(out_edge.dst): if other_edge.src_conn[3:] == out_edge.dst_conn[2:]: other_subset = other_edge.data.subset \ if other_edge.data.data == node.data \ else other_edge.data.other_subset for (idx, (ssbs1, ssbs2)) in enumerate( zip(out_edge.data.subset, other_subset)): if ssbs1 != ssbs2: variate_dimensions.add(idx) assert other_subset.dims() == subset_length invariant_dimensions = set([i for i in range(subset_length) ]) - variate_dimensions return invariant_dimensions def redirect_edge(self, graph, edge, new_src=None, new_src_conn=None, new_dst=None, new_dst_conn=None, new_data=None): data = new_data if new_data else edge.data if new_src: ret = graph.add_edge(new_src, new_src_conn, edge.dst, edge.dst_conn, data) graph.remove_edge(edge) if new_dst: ret = graph.add_edge(edge.src, edge.src_conn, new_dst, new_dst_conn, data) graph.remove_edge(edge) return ret def prepare_intermediate_nodes(self, sdfg, graph, in_nodes, out_nodes, intermediate_nodes, map_entries, map_exits, do_not_override=[]): ''' For every interemediate node, determines whether it is fully contained in the subgraph and whether it has any out connections and thus transients need to be created ''' def redirect(redirect_node, original_node): # redirect all outgoing traffic which # does not enter fusion scope again # from original_node to redirect_node # and then create a path from original_node to redirect_node. edges = list(graph.out_edges(original_node)) for edge in edges: if edge.dst not in map_entries: self.redirect_edge(graph, edge, new_src=redirect_node) graph.add_edge(original_node, None, redirect_node, None, Memlet()) # first search whether intermediate_nodes appear outside of subgraph # and store it in dict data_counter = defaultdict(int) data_counter_subgraph = defaultdict(int) data_intermediate = set([node.data for node in intermediate_nodes]) # do a full global search and count each data from each intermediate node scope_dict = graph.scope_dict() for state in sdfg.nodes(): for node in state.nodes(): if isinstance( node, nodes.AccessNode) and node.data in data_intermediate: # add them to the counter set in all cases data_counter[node.data] += 1 # see whether we are inside the subgraph scope # if so, add to data_counter_subgraph # DO NOT add if it is in out_nodes if state == graph and \ (node in intermediate_nodes or scope_dict[node] in map_entries): data_counter_subgraph[node.data] += 1 # next up: If intermediate_counter and global counter match and if the array # is declared transient, it is fully contained by the subgraph subgraph_contains_data = {data: data_counter[data] == data_counter_subgraph[data] \ and sdfg.data(data).transient \ and data not in do_not_override \ for data in data_intermediate} transients_created = {} for node in intermediate_nodes & out_nodes: # create new transient at exit replacing the array # and redirect all traffic data_ref = sdfg.data(node.data) out_trans_data_name = node.data + '_OUT' data_trans = sdfg.add_transient(name=out_trans_data_name, shape=dcpy(data_ref.shape), dtype=dcpy(data_ref.dtype), storage=dcpy(data_ref.storage), offset=dcpy(data_ref.offset)) node_trans = graph.add_access(out_trans_data_name) if node.setzero: node_trans.setzero = True redirect(node_trans, node) transients_created[node] = node_trans # finally, create dict for every array that for which # subgraph_contains_data is true that lists invariant axes. invariant_dimensions = {} for node in intermediate_nodes: if subgraph_contains_data[node.data]: # only need to check in this case # else the array doesn't get modified and we don't # need invariate dimensions data = node.data inv_dims = SubgraphFusion.get_invariant_dimensions( sdfg, graph, map_entries, map_exits, node) if node in invariant_dimensions: # do a check -- we want the same result for each # node containing the same data if not inv_dims == invariant_dimensions[node]: warnings.warn( f"WARNING: Data dimensions that are not propagated through differ" "across multiple instances of access nodes for data {node.data}" "Please check whether all memlets to AccessNodes containing" "this data are sound.") invariant_dimensions[data] |= inv_dims else: invariant_dimensions[data] = inv_dims return (subgraph_contains_data, transients_created, invariant_dimensions) def apply(self, sdfg, do_not_override=None, **kwargs): subgraph = self.subgraph_view(sdfg) graph = subgraph.graph map_entries = helpers.get_highest_scope_maps(sdfg, graph, subgraph) self.fuse(sdfg, graph, map_entries, do_not_override, **kwargs) def fuse(self, sdfg, graph, map_entries, do_not_override=None, **kwargs): """ takes the map_entries specified and tries to fuse maps. all maps have to be extended into outer and inner map (use MapExpansion as a pre-pass) Arrays that don't exist outside the subgraph get pushed into the map and their data dimension gets cropped. Otherwise the original array is taken. For every output respective connections are crated automatically. :param sdfg: SDFG :param graph: State :param map_entries: Map Entries (class MapEntry) of the outer maps which we want to fuse :param do_not_override: List of data names whose corresponding nodes are fully contained within the subgraph but should not be augmented/transformed nevertheless. """ # if there are no maps, return immediately if len(map_entries) == 0: return do_not_override = do_not_override or [] # get maps and map exits maps = [map_entry.map for map_entry in map_entries] map_exits = [graph.exit_node(map_entry) for map_entry in map_entries] # Nodes that flow into one or several maps but no data is flowed to them from any map in_nodes = set() # Nodes into which data is flowed but that no data flows into any map from them out_nodes = set() # Nodes that act as intermediate node - data flows from a map into them and then there # is an outgoing path into another map intermediate_nodes = set() ### NOTE: #- in_nodes, out_nodes, intermediate_nodes refer to the configuration of the final fused map #- in_nodes and out_nodes are trivially disjoint #- Intermediate_nodes and out_nodes are not necessarily disjoint #- Intermediate_nodes and in_nodes are disjoint by design. # There could be a node that has both incoming edges from a map exit # and from outside, but it is just treated as intermediate_node and handled # automatically. for map_entry, map_exit in zip(map_entries, map_exits): for edge in graph.in_edges(map_entry): in_nodes.add(edge.src) for edge in graph.out_edges(map_exit): current_node = edge.dst if len(graph.out_edges(current_node)) == 0: out_nodes.add(current_node) else: for dst_edge in graph.out_edges(current_node): if dst_edge.dst in map_entries: # add to intermediate_nodes intermediate_nodes.add(current_node) else: # add to out_nodes out_nodes.add(current_node) for e in graph.in_edges(current_node): if e.src not in map_exits: raise NotImplementedError( "Nodes between two maps to be" "fused with *incoming* edges" "from outside the maps are not" "allowed yet.") # any intermediate_nodes currently in in_nodes shouldnt be there in_nodes -= intermediate_nodes if self.debug: print("SubgraphFusion::In_nodes", in_nodes) print("SubgraphFusion::Out_nodes", out_nodes) print("SubgraphFusion::Intermediate_nodes", intermediate_nodes) # all maps are assumed to have the same params and range in order global_map = nodes.Map(label="outer_fused", params=maps[0].params, ndrange=maps[0].range) global_map_entry = nodes.MapEntry(global_map) global_map_exit = nodes.MapExit(global_map) schedule = map_entries[0].schedule global_map_entry.schedule = schedule graph.add_node(global_map_entry) graph.add_node(global_map_exit) # next up, for any intermediate node, find whether it only appears # in the subgraph or also somewhere else / as an input # create new transients for nodes that are in out_nodes and # intermediate_nodes simultaneously # also check which dimensions of each transient data element correspond # to map axes and write this information into a dict. node_info = self.prepare_intermediate_nodes(sdfg, graph, in_nodes, out_nodes, \ intermediate_nodes,\ map_entries, map_exits, \ do_not_override) (subgraph_contains_data, transients_created, invariant_dimensions) = node_info if self.debug: print( "SubgraphFusion:: {Intermediate_node: subgraph_contains_data} dict" ) print(subgraph_contains_data) inconnectors_dict = {} # Dict for saving incoming nodes and their assigned connectors # Format: {access_node: (edge, in_conn, out_conn)} for map_entry, map_exit in zip(map_entries, map_exits): # handle inputs # TODO: dynamic map range -- this is fairly unrealistic in such a setting for edge in graph.in_edges(map_entry): src = edge.src mmt = graph.memlet_tree(edge) out_edges = [child.edge for child in mmt.root().children] if src in in_nodes: in_conn = None out_conn = None if src in inconnectors_dict: # no need to augment subset of outer edge. # will do this at the end in one pass. in_conn = inconnectors_dict[src][1] out_conn = inconnectors_dict[src][2] graph.remove_edge(edge) else: next_conn = global_map_entry.next_connector() in_conn = 'IN_' + next_conn out_conn = 'OUT_' + next_conn global_map_entry.add_in_connector(in_conn) global_map_entry.add_out_connector(out_conn) inconnectors_dict[src] = (edge, in_conn, out_conn) # reroute in edge via global_map_entry self.redirect_edge(graph, edge, new_dst = global_map_entry, \ new_dst_conn = in_conn) # map out edges to new map for out_edge in out_edges: self.redirect_edge(graph, out_edge, new_src = global_map_entry, \ new_src_conn = out_conn) else: # connect directly for out_edge in out_edges: mm = dcpy(out_edge.data) self.redirect_edge(graph, out_edge, new_src=src, new_data=mm) graph.remove_edge(edge) for edge in graph.out_edges(map_entry): # special case: for nodes that have no data connections if not edge.src_conn: self.redirect_edge(graph, edge, new_src=global_map_entry) ###################################### for edge in graph.in_edges(map_exit): if not edge.dst_conn: # no destination connector, path ends here. self.redirect_edge(graph, edge, new_dst=global_map_exit) continue # find corresponding out_edges for current edge, cannot use mmt anymore out_edges = [ oedge for oedge in graph.out_edges(map_exit) if oedge.src_conn[3:] == edge.dst_conn[2:] ] # Tuple to store in/out connector port that might be created port_created = None for out_edge in out_edges: dst = out_edge.dst if dst in intermediate_nodes & out_nodes: # create connection through global map from # dst to dst_transient that was created dst_transient = transients_created[dst] next_conn = global_map_exit.next_connector() in_conn = 'IN_' + next_conn out_conn = 'OUT_' + next_conn global_map_exit.add_in_connector(in_conn) global_map_exit.add_out_connector(out_conn) inner_memlet = dcpy(edge.data) inner_memlet.other_subset = dcpy(edge.data.subset) e_inner = graph.add_edge(dst, None, global_map_exit, in_conn, inner_memlet) mm_outer = propagate_memlet(graph, inner_memlet, global_map_entry, \ union_inner_edges = False) e_outer = graph.add_edge(global_map_exit, out_conn, dst_transient, None, mm_outer) # remove edge from dst to dst_transient that was created # in intermediate preparation. for e in graph.out_edges(dst): if e.dst == dst_transient: graph.remove_edge(e) removed = True break if self.debug: assert removed == True # handle separately: intermediate_nodes and pure out nodes # case 1: intermediate_nodes: can just redirect edge if dst in intermediate_nodes: self.redirect_edge(graph, out_edge, new_src=edge.src, new_src_conn=edge.src_conn, new_data=dcpy(edge.data)) # case 2: pure out node: connect to outer array node if dst in (out_nodes - intermediate_nodes): if edge.dst != global_map_exit: next_conn = global_map_exit.next_connector() in_conn = 'IN_' + next_conn out_conn = 'OUT_' + next_conn global_map_exit.add_in_connector(in_conn) global_map_exit.add_out_connector(out_conn) self.redirect_edge(graph, edge, new_dst=global_map_exit, new_dst_conn=in_conn) port_created = (in_conn, out_conn) #edge.dst = global_map_exit #edge.dst_conn = in_conn else: conn_nr = edge.dst_conn[3:] in_conn = port_created.st out_conn = port_created.nd # map graph.add_edge(global_map_exit, out_conn, dst, None, dcpy(out_edge.data)) graph.remove_edge(out_edge) # remove the edge if it has not been used by any pure out node if not port_created: graph.remove_edge(edge) # maps are now ready to be discarded graph.remove_node(map_entry) graph.remove_node(map_exit) # end main loop. # create a mapping from data arrays to offsets # for later memlet adjustments later min_offsets = dict() # do one pass to augment all transient arrays data_intermediate = set([node.data for node in intermediate_nodes]) for data_name in data_intermediate: if subgraph_contains_data[data_name]: all_nodes = [ n for n in intermediate_nodes if n.data == data_name ] in_edges = list(chain(*(graph.in_edges(n) for n in all_nodes))) in_edges_iter = iter(in_edges) in_edge = next(in_edges_iter) target_subset = dcpy(in_edge.data.subset) target_subset.pop(invariant_dimensions[data_name]) ###### while True: try: # executed if there are multiple in_edges in_edge = next(in_edges_iter) target_subset_curr = dcpy(in_edge.data.subset) target_subset_curr.pop(invariant_dimensions[data_name]) target_subset = subsets.union(target_subset, \ target_subset_curr) except StopIteration: break min_offsets_cropped = target_subset.min_element_approx() # calculate the new transient array size. target_subset.offset(min_offsets_cropped, True) # re-add invariant dimensions with offset 0 and save to min_offsets min_offset = [] index = 0 for i in range(len(sdfg.data(data_name).shape)): if i in invariant_dimensions[data_name]: min_offset.append(0) else: min_offset.append(min_offsets_cropped[index]) index += 1 min_offsets[data_name] = min_offset # determine the shape of the new array. new_data_shape = [] index = 0 for i, sz in enumerate(sdfg.data(data_name).shape): if i in invariant_dimensions[data_name]: new_data_shape.append(sz) else: new_data_shape.append(target_subset.size()[index]) index += 1 new_data_strides = [ data._prod(new_data_shape[i + 1:]) for i in range(len(new_data_shape)) ] new_data_totalsize = data._prod(new_data_shape) new_data_offset = [0] * len(new_data_shape) # augment. transient_to_transform = sdfg.data(data_name) transient_to_transform.shape = new_data_shape transient_to_transform.strides = new_data_strides transient_to_transform.total_size = new_data_totalsize transient_to_transform.offset = new_data_offset transient_to_transform.lifetime = dtypes.AllocationLifetime.Scope transient_to_transform.storage = self.transient_allocation else: # don't modify data container - array is needed outside # of subgraph. # hack: set lifetime to State if allocation has only been # scope so far to avoid allocation issues if sdfg.data( data_name).lifetime == dtypes.AllocationLifetime.Scope: sdfg.data( data_name).lifetime = dtypes.AllocationLifetime.State # do one pass to adjust and the memlets of in-between transients for node in intermediate_nodes: # all incoming edges to node in_edges = graph.in_edges(node) # outgoing edges going to another fused part inter_edges = [] # outgoing edges that exit global map out_edges = [] for e in graph.out_edges(node): if e.dst == global_map_exit: out_edges.append(e) else: inter_edges.append(e) # offset memlets where necessary if subgraph_contains_data[node.data]: # get min_offset min_offset = min_offsets[node.data] # re-add invariant dimensions with offset 0 for iedge in in_edges: for edge in graph.memlet_tree(iedge): if edge.data.data == node.data: edge.data.subset.offset(min_offset, True) elif edge.data.other_subset: edge.data.other_subset.offset(min_offset, True) for cedge in inter_edges: for edge in graph.memlet_tree(cedge): if edge.data.data == node.data: edge.data.subset.offset(min_offset, True) elif edge.data.other_subset: edge.data.other_subset.offset(min_offset, True) # if in_edges has several entries: # put other_subset into out_edges for correctness if len(in_edges) > 1: for oedge in out_edges: oedge.data.other_subset = dcpy(oedge.data.subset) oedge.data.other_subset.offset(min_offset, True) # also correct memlets of created transient if node in transients_created: transient_in_edges = graph.in_edges(transients_created[node]) transient_out_edges = graph.out_edges(transients_created[node]) for edge in chain(transient_in_edges, transient_out_edges): for e in graph.memlet_tree(edge): if e.data.data == node.data: e.data.data += '_OUT' # do one last pass to correct outside memlets adjacent to global map for out_connector in global_map_entry.out_connectors: # find corresponding in_connector # and the in-connecting edge in_connector = 'IN' + out_connector[3:] for iedge in graph.in_edges(global_map_entry): if iedge.dst_conn == in_connector: in_edge = iedge # find corresponding out_connector # and all out-connecting edges that belong to it # count them oedge_counter = 0 for oedge in graph.out_edges(global_map_entry): if oedge.src_conn == out_connector: out_edge = oedge oedge_counter += 1 # do memlet propagation # if there are several out edges, else there is no need if oedge_counter > 1: memlet_out = propagate_memlet(dfg_state=graph, memlet=out_edge.data, scope_node=global_map_entry, union_inner_edges=True) # override number of accesses in_edge.data.volume = memlet_out.volume in_edge.data.subset = memlet_out.subset # create a hook for outside access to global_map self._global_map_entry = global_map_entry
class Tasklet(CodeNode): """ A node that contains a tasklet: a functional computation procedure that can only access external data specified using connectors. Tasklets may be implemented in Python, C++, or any supported language by the code generator. """ code = CodeProperty(desc="Tasklet code", default=CodeBlock("")) debuginfo = DebugInfoProperty() instrument = Property( choices=dtypes.InstrumentationType, desc="Measure execution statistics with given method", default=dtypes.InstrumentationType.No_Instrumentation) def __init__(self, label, inputs=None, outputs=None, code="", language=dtypes.Language.Python, location=None, debuginfo=None): super(Tasklet, self).__init__(label, location, inputs, outputs) self.code = CodeBlock(code, language) self.debuginfo = debuginfo @property def language(self): return self.code.language @staticmethod def from_json(json_obj, context=None): ret = Tasklet("dummylabel") dace.serialize.set_properties_from_json(ret, json_obj, context=context) return ret @property def name(self): return self._label def validate(self, sdfg, state): if not dtypes.validate_name(self.label): raise NameError('Invalid tasklet name "%s"' % self.label) for in_conn in self.in_connectors: if not dtypes.validate_name(in_conn): raise NameError('Invalid input connector "%s"' % in_conn) for out_conn in self.out_connectors: if not dtypes.validate_name(out_conn): raise NameError('Invalid output connector "%s"' % out_conn) @property def free_symbols(self) -> Set[str]: return self.code.get_free_symbols(self.in_connectors.keys() | self.out_connectors.keys()) def infer_connector_types(self, sdfg, state): # If a Python tasklet, use type inference to figure out all None output # connectors if all(cval.type is not None for cval in self.out_connectors.values()): return if self.code.language != dtypes.Language.Python: return if any(cval.type is None for cval in self.in_connectors.values()): raise TypeError('Cannot infer output connectors of tasklet "%s", ' 'not all input connectors have types' % str(self)) # Avoid import loop from dace.codegen.tools.type_inference import infer_types # Get symbols defined at beginning of node, and infer all types in # tasklet syms = state.symbols_defined_at(self) syms.update(self.in_connectors) new_syms = infer_types(self.code.code, syms) for cname, oconn in self.out_connectors.items(): if oconn.type is None: if cname not in new_syms: raise TypeError('Cannot infer type of tasklet %s output ' '"%s", please specify manually.' % (self.label, cname)) self.out_connectors[cname] = new_syms[cname] def __str__(self): if not self.label: return "--Empty--" else: return self.label
class Array(Data): """ Array/constant descriptor (dimensions, type and other properties). """ # Properties allow_conflicts = Property(dtype=bool) # TODO: Should we use a Code property here? materialize_func = Property(dtype=str, allow_none=True, setter=set_materialize_func) access_order = Property(dtype=tuple) strides = Property(dtype=list) offset = Property(dtype=list) may_alias = Property(dtype=bool, default=False, desc='This pointer may alias with other pointers in ' 'the same function') def __init__(self, dtype, shape, materialize_func=None, transient=False, allow_conflicts=False, storage=dace.types.StorageType.Default, location='', access_order=None, strides=None, offset=None, may_alias=False, toplevel=False, debuginfo=None): super(Array, self).__init__(dtype, shape, transient, storage, location, toplevel, debuginfo) if shape is None: raise IndexError('Shape must not be None') self.allow_conflicts = allow_conflicts self.materialize_func = materialize_func self.may_alias = may_alias if access_order is not None: self.access_order = cp.copy(access_order) else: self.access_order = tuple(i for i in range(len(shape))) if strides is not None: self.strides = cp.copy(strides) else: self.strides = cp.copy(list(shape)) if offset is not None: self.offset = cp.copy(offset) else: self.offset = [0] * len(shape) self.validate() def __repr__(self): return 'Array (dtype=%s, shape=%s)' % (self.dtype, self.shape) def clone(self): return Array(self.dtype, self.shape, self.materialize_func, self.transient, self.allow_conflicts, self.storage, self.location, self.access_order, self.strides, self.offset, self.may_alias, self.toplevel, self.debuginfo) def validate(self): super(Array, self).validate() if len(self.access_order) != len(self.shape): raise TypeError('Access order must be the same size as shape') if len(self.strides) != len(self.shape): raise TypeError('Strides must be the same size as shape') if any(not isinstance(s, (int, symbolic.SymExpr, symbolic.symbol, symbolic.sympy.Basic)) for s in self.strides): raise TypeError('Strides must be a list or tuple of integer ' 'values or symbols') if len(self.offset) != len(self.shape): raise TypeError('Offset must be the same size as shape') def covers_range(self, rng): if len(rng) != len(self.shape): return False for s, (rb, re, rs) in zip(self.shape, rng): # Shape has to be positive if isinstance(s, sympy.Basic): olds = s if 'positive' in s.assumptions0: s = sympy.Symbol(str(s), **s.assumptions0) else: s = sympy.Symbol(str(s), positive=True, **s.assumptions0) if isinstance(rb, sympy.Basic): rb = rb.subs({olds: s}) if isinstance(re, sympy.Basic): re = re.subs({olds: s}) if isinstance(rs, sympy.Basic): rs = rs.subs({olds: s}) try: if rb < 0: # Negative offset return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (rb > 0), # 'If this expression is false, please refine symbol definitions in the program.') try: if re > s: # Beyond shape return False except TypeError: # cannot determine truth value of Relational pass #print('WARNING: Cannot evaluate relational expression %s, assuming true.' % (re < s), # 'If this expression is false, please refine symbol definitions in the program.') return True # Checks for equivalent shape and type def is_equivalent(self, other): if not isinstance(other, Array): return False # Test type if self.dtype != other.type: return False # Test dimensionality if len(self.shape) != len(other.shape): return False # Test shape for dim, otherdim in zip(self.shape, other.shape): # If both are symbols, ensure equality if symbolic.issymbolic(dim) and symbolic.issymbolic(otherdim): if dim != otherdim: return False # If one is a symbol and the other is a constant # make sure they are equivalent elif symbolic.issymbolic(otherdim): if symbolic.eval(otherdim) != dim: return False elif symbolic.issymbolic(dim): if symbolic.eval(dim) != otherdim: return False else: # Any other case (constant vs. constant), check for equality if otherdim != dim: return False return True def signature(self, with_types=True, for_call=False, name=None): arrname = name if self.materialize_func is not None: if for_call: return 'nullptr' if not with_types: return arrname arrname = '/* ' + arrname + ' (immaterial) */' if not with_types or for_call: return arrname if self.may_alias: return str(self.dtype.ctype) + ' *' + arrname return str(self.dtype.ctype) + ' * __restrict__ ' + arrname def sizes(self): return [ d.name if isinstance(d, symbolic.symbol) else str(d) for d in self.shape ]
class Map(object): """ A Map is a two-node representation of parametric graphs, containing an integer set by which the contents (nodes dominated by an entry node and post-dominated by an exit node) are replicated. Maps contain a `schedule` property, which specifies how the scope should be scheduled (execution order). Code generators can use the schedule property to generate appropriate code, e.g., GPU kernels. """ # List of (editable) properties label = Property(dtype=str, desc="Label of the map") params = ListProperty(element_type=str, desc="Mapped parameters") range = RangeProperty(desc="Ranges of map parameters", default=sbs.Range([])) schedule = Property(dtype=dtypes.ScheduleType, desc="Map schedule", choices=dtypes.ScheduleType, from_string=lambda x: dtypes.ScheduleType[x], default=dtypes.ScheduleType.Default) unroll = Property(dtype=bool, desc="Map unrolling") collapse = Property(dtype=int, default=1, desc="How many dimensions to" " collapse into the parallel range") debuginfo = DebugInfoProperty() is_collapsed = Property(dtype=bool, desc="Show this node/scope/state as collapsed", default=False) instrument = Property( choices=dtypes.InstrumentationType, desc="Measure execution statistics with given method", default=dtypes.InstrumentationType.No_Instrumentation) def __init__(self, label, params, ndrange, schedule=dtypes.ScheduleType.Default, unroll=False, collapse=1, fence_instrumentation=False, debuginfo=None): super(Map, self).__init__() # Assign properties self.label = label self.schedule = schedule self.unroll = unroll self.collapse = 1 self.params = params self.range = ndrange self.debuginfo = debuginfo self._fence_instrumentation = fence_instrumentation def __str__(self): return self.label + "[" + ", ".join([ "{}={}".format(i, r) for i, r in zip(self._params, [sbs.Range.dim_to_string(d) for d in self._range]) ]) + "]" def validate(self, sdfg, state, node): if not dtypes.validate_name(self.label): raise NameError('Invalid map name "%s"' % self.label) def get_param_num(self): """ Returns the number of map dimension parameters/symbols. """ return len(self.params)
class MultiExpansion(transformation.SubgraphTransformation): ''' Implements the MultiExpansion transformation. Takes all the lowest scope maps in a given subgraph, for each of these maps splits it into an outer and inner map, where the outer map contains the common ranges of all maps, and the inner map the rest. Map access variables and memlets are changed accordingly ''' debug = Property(dtype=bool, desc="Debug Mode", default=False) sequential_innermaps = Property(dtype=bool, desc="Make all inner maps that are" "created during expansion sequential", default=False) @staticmethod def can_be_applied(sdfg: SDFG, subgraph: SubgraphView) -> bool: ### get lowest scope maps of subgraph # grab first node and see whether all nodes are in the same graph # (or nested sdfgs therein) graph = subgraph.graph for node in subgraph.nodes(): if node not in graph.nodes(): return False # next, get all the maps maps = helpers.get_highest_scope_maps(sdfg, graph, subgraph) brng = helpers.common_map_base_ranges(maps) # if leq than one map found -> fail if len(maps) <= 1: return False # see whether they have common parameters; if not -> fail if len(brng) == 0: return False return True def apply(self, sdfg, map_base_variables=None): # get lowest scope map entries and expand subgraph = self.subgraph_view(sdfg) graph = subgraph.graph # next, get all the base maps and expand maps = helpers.get_highest_scope_maps(sdfg, graph, subgraph) self.expand(sdfg, graph, maps, map_base_variables=map_base_variables) def expand(self, sdfg, graph, map_entries, map_base_variables=None): """ Expansion into outer and inner maps for each map in a specified set. The resulting outer maps all have same range and indices, corresponding variables and memlets get changed accordingly. The inner map contains the leftover dimensions :param sdfg: Underlying SDFG :param graph: Graph in which we expand :param map_entries: List of Map Entries(Type MapEntry) that we want to expand :param map_base_variables: Optional parameter. List of strings If None, then expand() searches for the maximal amount of equal map ranges and pushes those and their corresponding loop variables into the outer loop. If specified, then expand() pushes the ranges belonging to the loop iteration variables specified into the outer loop (For instance map_base_variables = ['i','j'] assumes that all maps have common iteration indices i and j with corresponding correct ranges) """ maps = [entry.map for entry in map_entries] if not map_base_variables: # find the maximal subset of variables to expand # greedy if there exist multiple ranges that are equal in a map map_base_ranges = helpers.common_map_base_ranges(maps) reassignments = helpers.find_reassignment(maps, map_base_ranges) ##### first, regroup and reassign # create params_dict for every map # first, let us define the outer iteration variable names, # just take the first map and their indices at common ranges map_base_variables = [] for rng in map_base_ranges: for i in range(len(maps[0].params)): if maps[0].range[i] == rng and maps[0].params[ i] not in map_base_variables: map_base_variables.append(maps[0].params[i]) break params_dict = {} if self.debug: print("MultiExpansion::Map_base_variables:", map_base_variables) print("MultiExpansion::Map_base_ranges:", map_base_ranges) for map in maps: # for each map create param dict, first assign identity params_dict_map = {param: param for param in map.params} # now look for the correct reassignment # for every element neq -1, need to change param to map_base_variables[] # if param already appears in own dict, do a swap # else we just replace it for i, reassignment in enumerate(reassignments[map]): if reassignment == -1: # nothing to do pass else: current_var = map.params[i] current_assignment = params_dict_map[current_var] target_assignment = map_base_variables[reassignment] if current_assignment != target_assignment: if target_assignment in params_dict_map.values(): # do a swap key1 = current_var for key, value in params_dict_map.items(): if value == target_assignment: key2 = key value1 = params_dict_map[key1] value2 = params_dict_map[key2] params_dict_map[key1] = key2 params_dict_map[key2] = key1 else: # just reassign params_dict_map[ current_var] = target_assignment # done, assign params_dict_map to the global one params_dict[map] = params_dict_map for map, map_entry in zip(maps, map_entries): map_scope = graph.scope_subgraph(map_entry) params_dict_map = params_dict[map] for firstp, secondp in params_dict_map.items(): if firstp != secondp: replace(map_scope, firstp, '__' + firstp + '_fused') for firstp, secondp in params_dict_map.items(): if firstp != secondp: replace(map_scope, '__' + firstp + '_fused', secondp) # now also replace the map variables inside maps for i in range(len(map.params)): map.params[i] = params_dict_map[map.params[i]] if self.debug: print("MultiExpansion::Params replaced") else: # just calculate map_base_ranges # do a check whether all maps correct map_base_ranges = [] map0 = maps[0] for var in map_base_variables: index = map0.params.index(var) map_base_ranges.append(map0.range[index]) for map in maps: for var, rng in zip(map_base_variables, map_base_ranges): assert map.range[map.params.index(var)] == rng # then expand all the maps for map, map_entry in zip(maps, map_entries): if map.get_param_num() == len(map_base_variables): # nothing to expand, continue continue map_exit = graph.exit_node(map_entry) # create two new maps, outer and inner params_outer = map_base_variables ranges_outer = map_base_ranges init_params_inner = [] init_ranges_inner = [] for param, rng in zip(map.params, map.range): if param in map_base_variables: continue else: init_params_inner.append(param) init_ranges_inner.append(rng) params_inner = init_params_inner ranges_inner = subsets.Range(init_ranges_inner) inner_map = nodes.Map(label = map.label + '_inner', params = params_inner, ndrange = ranges_inner, schedule = dtypes.ScheduleType.Sequential \ if self.sequential_innermaps \ else dtypes.ScheduleType.Default) map.label = map.label + '_outer' map.params = params_outer map.range = ranges_outer # create new map entries and exits map_entry_inner = nodes.MapEntry(inner_map) map_exit_inner = nodes.MapExit(inner_map) # analogously to Map_Expansion for edge in graph.out_edges(map_entry): graph.remove_edge(edge) graph.add_memlet_path(map_entry, map_entry_inner, edge.dst, src_conn=edge.src_conn, memlet=edge.data, dst_conn=edge.dst_conn) dynamic_edges = dynamic_map_inputs(graph, map_entry) for edge in dynamic_edges: # Remove old edge and connector graph.remove_edge(edge) edge.dst._in_connectors.remove(edge.dst_conn) # Propagate to each range it belongs to path = [] for mapnode in [map_entry, map_entry_inner]: path.append(mapnode) if any(edge.dst_conn in map(str, symbolic.symlist(r)) for r in mapnode.map.range): graph.add_memlet_path(edge.src, *path, memlet=edge.data, src_conn=edge.src_conn, dst_conn=edge.dst_conn) for edge in graph.in_edges(map_exit): graph.remove_edge(edge) graph.add_memlet_path(edge.src, map_exit_inner, map_exit, memlet=edge.data, src_conn=edge.src_conn, dst_conn=edge.dst_conn)