class Pipeline(Map): """ This a convenience-subclass of Map that allows easier implementation of loop nests (using regular Map indices) that need a constant-sized initialization and drain phase (e.g., N*M + c iterations), which would otherwise need a flattened one-dimensional map. """ init_size = SymbolicProperty(default=0, desc="Number of initialization iterations.") init_overlap = Property( dtype=bool, default=True, desc="Whether to increment regular map indices during initialization.") drain_size = SymbolicProperty(default=1, desc="Number of drain iterations.") drain_overlap = Property( dtype=bool, default=True, desc="Whether to increment regular map indices during pipeline drain.") def __init__(self, *args, init_size=0, init_overlap=False, drain_size=0, drain_overlap=False, **kwargs): super(Pipeline, self).__init__(*args, **kwargs) self.init_size = init_size self.init_overlap = init_overlap self.drain_size = drain_size self.drain_overlap = drain_overlap def iterator_str(self): return "__" + "".join(self.params) def loop_bound_str(self): from dace.codegen.targets.common import sym2cpp bound = 1 for begin, end, step in self.range: bound *= (step + end - begin) // step # Add init and drain phases when relevant add_str = (" + " + sym2cpp(self.init_size) if self.init_size != 0 and not self.init_overlap else "") add_str += (" + " + sym2cpp(self.drain_size) if self.drain_size != 0 and not self.drain_overlap else "") return sym2cpp(bound) + add_str def init_condition(self): """Variable that can be checked to see if pipeline is currently in initialization phase.""" if self.init_size == 0: raise ValueError("No init condition exists for " + self.label) return self.iterator_str() + "_init" def drain_condition(self): """Variable that can be checked to see if pipeline is currently in draining phase.""" if self.drain_size == 0: raise ValueError("No drain condition exists for " + self.label) return self.iterator_str() + "_drain"
class Reduce(Node): """ An SDFG node that reduces an N-dimensional array to an (N-k)-dimensional array, with a list of axes to reduce and a reduction binary function. """ from dace.codegen.instrumentation.perfsettings import PerfSettings # Properties axes = Property(dtype=tuple, allow_none=True) wcr = LambdaProperty() identity = Property(dtype=object, allow_none=True) schedule = Property(dtype=types.ScheduleType, desc="Reduction execution policy", enum=types.ScheduleType, from_string=lambda x: types.ScheduleType[x]) papi_counters = Property(dtype=list, desc="List of PAPI counter preset identifiers.", default=PerfSettings.perf_default_papi_counters()) debuginfo = DebugInfoProperty() def __init__(self, wcr, axes, wcr_identity=None, schedule=types.ScheduleType.Default, debuginfo=None): super(Reduce, self).__init__() self.wcr = wcr # type: ast._Lambda self.axes = axes self.identity = wcr_identity self.schedule = schedule self.debuginfo = debuginfo def draw_node(self, sdfg, state): return dot.draw_node(sdfg, state, self, shape="invtriangle") def __str__(self): # Autodetect reduction type redtype = detect_reduction_type(self.wcr) if redtype == types.ReductionType.Custom: wcrstr = unparse(ast.parse(self.wcr).body[0].value.body) else: wcrstr = str(redtype) wcrstr = wcrstr[wcrstr.find('.') + 1:] # Skip "ReductionType." return 'Op: {op}, Axes: {axes}'.format( axes=('all' if self.axes is None else str(self.axes)), op=wcrstr) def __label__(self, sdfg, state): # Autodetect reduction type redtype = detect_reduction_type(self.wcr) if redtype == types.ReductionType.Custom: wcrstr = unparse(ast.parse(self.wcr).body[0].value.body) else: wcrstr = str(redtype) wcrstr = wcrstr[wcrstr.find('.') + 1:] # Skip "ReductionType." return 'Op: {op}\nAxes: {axes}'.format( axes=('all' if self.axes is None else str(self.axes)), op=wcrstr)
class LoopBack(ControlFlow): scope = Property(dtype=LoopScope, allow_none=True) edge = Property(dtype=Edge, allow_none=True) def __init__(self, scope, edge, *args, **kwargs): self.scope = scope self.edge = edge scope.back = self super().__init__(*args, **kwargs)
class IfExit(ControlFlow): scope = Property(dtype=ControlFlowScope, allow_none=True) edge = Property(dtype=Edge, allow_none=True) def __init__(self, scope, edge, *args, **kwargs): self.scope = scope self.edge = edge scope.exit = self super().__init__(*args, **kwargs)
class LoopExit(ControlFlow): scope = Property(dtype=LoopScope) edge = Property(dtype=Edge) def __init__(self, scope, edge, *args, **kwargs): self.scope = scope self.edge = edge scope.exit = self super().__init__(*args, **kwargs)
class IfEntry(ControlFlow): scope = Property(dtype=ControlFlowScope) edge = Property(dtype=Edge) def __init__(self, scope, edge, *args, **kwargs): self.scope = scope self.edge = edge scope.entry = self super().__init__(*args, **kwargs)
class IfThenElse: entry = Property(allow_none=True) exit = Property(allow_none=True) def __init__(self, entry, exit): self.entry = entry self.exit = exit self.then_scope = None self.else_scope = None
class AccessNode(Node): """ A node that accesses data in the SDFG. Denoted by a circular shape. """ access = Property(choices=dtypes.AccessType, desc="Type of access to this array", default=dtypes.AccessType.ReadWrite) setzero = Property(dtype=bool, desc="Initialize to zero", default=False) debuginfo = DebugInfoProperty() data = DataProperty(desc="Data (array, stream, scalar) to access") def __init__(self, data, access=dtypes.AccessType.ReadWrite, debuginfo=None): super(AccessNode, self).__init__() # Properties self.debuginfo = debuginfo self.access = access if not isinstance(data, str): raise TypeError('Data for AccessNode must be a string') self.data = data @staticmethod def from_json(json_obj, context=None): ret = AccessNode("Nodata") dace.serialize.set_properties_from_json(ret, json_obj, context=context) return ret def __deepcopy__(self, memo): node = object.__new__(AccessNode) node._access = self._access node._data = self._data node._setzero = self._setzero node._in_connectors = dcpy(self._in_connectors, memo=memo) node._out_connectors = dcpy(self._out_connectors, memo=memo) node.debuginfo = dcpy(self.debuginfo, memo=memo) return node @property def label(self): return self.data def __label__(self, sdfg, state): return self.data def desc(self, sdfg): from dace.sdfg import SDFGState, ScopeSubgraphView if isinstance(sdfg, (SDFGState, ScopeSubgraphView)): sdfg = sdfg.parent return sdfg.arrays[self.data] def validate(self, sdfg, state): if self.data not in sdfg.arrays: raise KeyError('Array "%s" not found in SDFG' % self.data)
class IfElseScope(ControlFlowScope): if_then_else = Property(dtype=IfThenElse, allow_none=True) entry = Property(dtype=IfEntry, allow_none=True) exit = Property(dtype=IfExit, allow_none=True) def __init__(self, if_then_else, *args, **kwargs): self.if_then_else = if_then_else if_then_else.else_scope = self self.entry = None self.exit = None super().__init__(*args, **kwargs)
class CodeLibraryNode(LibraryNode): """ A convenience interface for nodes to generate specific code given properties. """ # Global properties implementations = {} default_implementation = None inputdict = Property(dtype=dict, default={}) outputdict = Property(dtype=dict, default={}) @property def has_side_effects(self) -> bool: # By default, assume code library nodes have side effects unless said otherwise return True def generate_code(self, inputs: Dict[str, Data], outputs: Dict[str, Data]) -> str: """ Method that is responsible for generating the code related to this node. :param inputs: A dictionary mapping input names (on node connectors) to data descriptors based on incoming memlets. :param outputs: A dictionary mapping output names (on node connectors) to data descriptors based on outgoing memlets. :return: A string representing C++ code to be injected instead of this node. :note: This method must be overridden by subclasses. """ raise NotImplementedError('Must be overridden by subclasses') def __init__(self, input_names, output_names, *args, name='Custom Code', **kwargs): # Store connector types, if given if isinstance(input_names, dict): self.inputdict = input_names else: self.inputdict = {k: None for k in set(input_names)} if isinstance(output_names, dict): self.outputdict = output_names else: self.outputdict = {k: None for k in set(output_names)} super().__init__(name, *args, inputs=set(input_names), outputs=set(output_names), **kwargs)
class ONNXParameter: """ Python representation of an ONNX parameter. """ name = Property(dtype=str, desc="The parameter name") description = Property(dtype=str, desc="A description of the parameter") type_str = Property(dtype=str, desc="The type string of this parameter") param_type = Property(choices=ONNXParameterType, desc="The type of the this parameter", default=ONNXParameterType.Single) homogeneous = Property(dtype=bool, desc="Whether this parameter is homogeneous") def __repr__(self): return "{} ({})".format(self.name, str(self.param_type))
class MapScoringEnumerator(Enumerator): ''' Abstract Enumerator class that is used by enumerators which rely on a scoring function ''' mode = Property(desc="Data type the Iterator should return. " "Choice between Subgraph and List of Map Entries.", default="map_entries", choices=["subgraph", "map_entries"], dtype=str) def __init__(self, sdfg, graph, subgraph, condition_function, scoring_function): super().__init__(sdfg, graph, subgraph, condition_function) # used to attach a score to each enumerated subgraph self._scoring_function = scoring_function def list(self): return list(e[0] for e in self.iterator()) def scores(self): return list(e for e in self.iterator())
class CodeNode(Node): """ A node that contains runnable code with acyclic external data dependencies. May either be a tasklet or a nested SDFG, and denoted by an octagonal shape. """ label = Property(dtype=str, desc="Name of the CodeNode") location = DictProperty( key_type=str, value_type=dace.symbolic.pystr_to_symbolic, desc='Full storage location identifier (e.g., rank, GPU ID)') environments = SetProperty( str, desc="Environments required by CMake to build and run this code node.", default=set()) def __init__(self, label="", location=None, inputs=None, outputs=None): super(CodeNode, self).__init__(inputs or set(), outputs or set()) # Properties self.label = label self.location = location if location is not None else {} @property def free_symbols(self) -> Set[str]: return set().union(*(map(str, pystr_to_symbolic(v).free_symbols) for v in self.location.values()))
class MyNode2(CodeLibraryNode): value_to_mul = Property(dtype=int, default=2, desc="Value to mul in custom code") def __init__(self, *args, **kwargs): super().__init__(input_names=['inp'], output_names=['out']) def generate_code(self, inputs: Dict[str, Array], outputs: Dict[str, Array]): assert len(inputs) == 1 assert len(outputs) == 1 inarr = inputs['inp'] outarr = outputs['out'] assert len(inarr.shape) == len(outarr.shape) # Construct for loops code = '' for dim, shp in enumerate(inarr.shape): code += f'for (int i{dim} = 0; i{dim} < {shp}; ++i{dim}) {{\n' # Construct index expressions output_expr = ' + '.join(f'i{dim} * {stride}' for dim, stride in enumerate(outarr.strides)) input_expr = ' + '.join(f'i{dim} * {stride}' for dim, stride in enumerate(inarr.strides)) code += \ f'out[{output_expr}] = inp[{input_expr}] * {self.value_to_mul};\n' # End for loops for dim in range(len(inarr.shape)): code += '}\n' return code
class CodeObject(object): name = Property(dtype=str, desc="Filename to use") code = Property(dtype=str, desc="The code attached to this object") language = Property(dtype=str, desc="Language used for this code (same " + "as its file extension)") # dtype=dtypes.Language? target = Property(dtype=type, desc="Target to use for compilation") title = Property(dtype=str, desc="Title of code for GUI") extra_compiler_kwargs = Property(dtype=dict, desc="Additional compiler argument " "variables to add to template") linkable = Property(dtype=bool, desc='Should this file participate in ' 'overall linkage?') def __init__(self, name, code, language, target, title, additional_compiler_kwargs=None, linkable=True): super(CodeObject, self).__init__() self.name = name self.code = code self.language = language self.target = target self.title = title self.extra_compiler_kwargs = additional_compiler_kwargs or {} self.linkable = linkable
class ONNXTypeConstraint: """ Python representation of an ONNX type constraint. """ type_str = Property(dtype=str, desc="The type parameter string") types = ListProperty( element_type=typeclass, desc= "The possible types. Note that only tensor types are currently supported." ) def __repr__(self): return self.type_str
class CodeObject(object): name = Property(dtype=str, desc="Filename to use") code = Property(dtype=str, desc="The code attached to this object") language = Property(dtype=str, desc="Language used for this code (same " + "as its file extension)") target = Property(dtype=type, desc="Target to use for compilation", allow_none=True) target_type = Property( dtype=str, desc="Sub-target within target (e.g., host or device code)", default="") title = Property(dtype=str, desc="Title of code for GUI") extra_compiler_kwargs = DictProperty(key_type=str, value_type=str, desc="Additional compiler argument " "variables to add to template") linkable = Property(dtype=bool, desc='Should this file participate in ' 'overall linkage?') environments = SetProperty( str, desc="Environments required by CMake to build and run this code node.", default=set()) def __init__(self, name, code, language, target, title, target_type="", additional_compiler_kwargs=None, linkable=True, environments=None, sdfg=None): super(CodeObject, self).__init__() self.name = name self.code = code self.language = language self.target = target self.target_type = target_type self.title = title self.extra_compiler_kwargs = additional_compiler_kwargs or {} self.linkable = linkable self.environments = environments or set() if language == 'cpp' and title == 'Frame' and sdfg: sourcemap.create_maps(sdfg, code, self.target.target_name) @property def clean_code(self): return re.sub(r'[ \t]*////__(DACE:|CODEGEN;)[^\n]*', '', self.code)
class ONNXAttribute: """ Python representation of an ONNX attribute. """ name = Property(dtype=str, desc="The attribute name") description = Property(dtype=str, desc="A description this attribute") required = Property(dtype=bool, desc="Whether this attribute is required") type = Property(choices=ONNXAttributeType, desc="The type of this attribute", default=ONNXAttributeType.Int) default_value = Property(dtype=None, desc="The default value of this attribute", default=None, allow_none=True) def validate(self): if self.required and self.type == ONNXAttributeType.Unsupported: raise NotImplementedError( "Required attribute '{}' has an unsupported type".format( self.name)) def __repr__(self): return self.name
class CodeObject(object): name = Property(dtype=str, desc="Filename to use") code = Property(dtype=str, desc="The code attached to this object") language = Property( dtype=str, desc="Language used for this code (same " + "as its file extension)") # dtype=dtypes.Language? target = Property( dtype=type, desc="Target to use for compilation", allow_none=True) target_type = Property( dtype=str, desc="Sub-target within target (e.g., host or device code)", default="") title = Property(dtype=str, desc="Title of code for GUI") extra_compiler_kwargs = Property( dtype=dict, desc="Additional compiler argument " "variables to add to template") linkable = Property( dtype=bool, desc='Should this file participate in ' 'overall linkage?') environments = SetProperty( str, desc="Environments required by CMake to build and run this code node.", default=set()) def __init__(self, name, code, language, target, title, target_type="", additional_compiler_kwargs=None, linkable=True, environments=set()): super(CodeObject, self).__init__() self.name = name self.code = code self.language = language self.target = target self.target_type = target_type self.title = title self.extra_compiler_kwargs = additional_compiler_kwargs or {} self.linkable = linkable self.environments = environments
class ControlFlowScope: nodes_in_scope = Property( dtype=set, desc="Nodes contained in this scope, " "including entry and exit nodes, in topological order.") def __init__(self, nodes_in_scope): self.nodes_in_scope = nodes_in_scope def __contains__(self, node): return node in self.nodes_in_scope def __iter__(self): return iter(self.nodes_in_scope)
class MyObject(object): float_prop = Property(dtype=float, default=0.0) def __init__(self, p: float): super().__init__() self.float_prop = p def to_json(self): return all_properties_to_json(self) @staticmethod def from_json(json_obj, context=None): ret = MyObject(0.0) set_properties_from_json(ret, json_obj, context=context) return ret
class InLocalStorage(pattern_matching.Transformation): """ Implements the InLocalStorage transformation, which adds a transient data node between nested map entry nodes. """ _outer_map_entry = nodes.MapEntry(nodes.Map("", [], [])) _inner_map_entry = nodes.MapEntry(nodes.Map("", [], [])) array = Property( dtype=str, desc="Array to create local storage for (if empty, first available)", default=None, allow_none=True) @staticmethod def annotates_memlets(): return True @staticmethod def expressions(): return [ nxutil.node_path_graph(InLocalStorage._outer_map_entry, InLocalStorage._inner_map_entry) ] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): return True @staticmethod def match_to_str(graph, candidate): outer_map_entry = candidate[InLocalStorage._outer_map_entry] inner_map_entry = candidate[InLocalStorage._inner_map_entry] return ' -> '.join( str(node) for node in [outer_map_entry, inner_map_entry]) def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] outer_map_entry = graph.nodes()[self.subgraph[ InLocalStorage._outer_map_entry]] inner_map_entry = graph.nodes()[self.subgraph[ InLocalStorage._inner_map_entry]] array = self.array if array is None: array = graph.edges_between(outer_map_entry, inner_map_entry)[0].data.data original_edge = None invariant_memlet = None for edge in graph.in_edges(inner_map_entry): src = edge.src if src != outer_map_entry: continue memlet = edge.data if array == memlet.data: original_edge = edge invariant_memlet = memlet break if invariant_memlet is None: for edge in graph.in_edges(inner_map_entry): src = edge.src if src != outer_map_entry: continue original_edge = edge invariant_memlet = edge.data print('WARNING: Array %s not found! Using array %s instead.' % (array, invariant_memlet.data)) array = invariant_memlet.data break if invariant_memlet is None: raise KeyError('Array %s not found!' % array) new_data = sdfg.add_array('trans_' + invariant_memlet.data, [ symbolic.overapproximate(r) for r in invariant_memlet.bounding_box_size() ], sdfg.arrays[invariant_memlet.data].dtype, transient=True) data_node = nodes.AccessNode('trans_' + invariant_memlet.data) to_data_mm = copy.deepcopy(invariant_memlet) from_data_mm = copy.deepcopy(invariant_memlet) from_data_mm.data = data_node.data offset = [] for ind, r in enumerate(invariant_memlet.subset): offset.append(r[0]) if isinstance(invariant_memlet.subset[ind], tuple): begin = invariant_memlet.subset[ind][0] - r[0] end = invariant_memlet.subset[ind][1] - r[0] step = invariant_memlet.subset[ind][2] from_data_mm.subset[ind] = (begin, end, step) else: from_data_mm.subset[ind] -= r[0] to_data_mm.other_subset = copy.deepcopy(from_data_mm.subset) # Reconnect, assuming one edge to the stream graph.remove_edge(original_edge) graph.add_edge(outer_map_entry, original_edge.src_conn, data_node, None, to_data_mm) graph.add_edge(data_node, None, inner_map_entry, original_edge.dst_conn, from_data_mm) for _parent, _, _child, _, memlet in graph.bfs_edges(inner_map_entry, reverse=False): if memlet.data != array: continue for ind, r in enumerate(memlet.subset): if isinstance(memlet.subset[ind], tuple): begin = r[0] - offset[ind] end = r[1] - offset[ind] step = r[2] memlet.subset[ind] = (begin, end, step) else: memlet.subset[ind] -= offset[ind] memlet.data = 'trans_' + invariant_memlet.data return
class MapReduceFusion(pm.SingleStateTransformation): """ Implements the map-reduce-fusion transformation. Fuses a map with an immediately following reduction, where the array between the map and the reduction is not used anywhere else. """ no_init = Property( dtype=bool, default=False, desc='If enabled, does not create initialization states ' 'for reduce nodes with identity') tasklet = pm.PatternNode(nodes.Tasklet) tmap_exit = pm.PatternNode(nodes.MapExit) in_array = pm.PatternNode(nodes.AccessNode) import dace.libraries.standard as stdlib # Avoid import loop reduce = pm.PatternNode(stdlib.Reduce) out_array = pm.PatternNode(nodes.AccessNode) @classmethod def expressions(cls): return [ sdutil.node_path_graph(cls.tasklet, cls.tmap_exit, cls.in_array, cls.reduce, cls.out_array) ] def can_be_applied(self, graph, expr_index, sdfg, permissive=False): tmap_exit = self.tmap_exit in_array = self.in_array reduce_node = self.reduce tasklet = self.tasklet # Make sure that the array is only accessed by the map and the reduce if any([ src != tmap_exit for src, _, _, _, memlet in graph.in_edges(in_array) ]): return False if any([ dest != reduce_node for _, _, dest, _, memlet in graph.out_edges(in_array) ]): return False tmem = next(e for e in graph.edges_between(tasklet, tmap_exit) if e.data.data == in_array.data).data # Make sure that the transient is not accessed anywhere else # in this state or other states if not permissive and (len([ n for n in graph.nodes() if isinstance(n, nodes.AccessNode) and n.data == in_array.data ]) > 1 or in_array.data in sdfg.shared_transients()): return False # If memlet already has WCR and it is different from reduce node, # do not match if tmem.wcr is not None and tmem.wcr != reduce_node.wcr: return False # Verify that reduction ranges match tasklet map tout_memlet = graph.in_edges(in_array)[0].data rin_memlet = graph.out_edges(in_array)[0].data if tout_memlet.subset != rin_memlet.subset: return False return True def match_to_str(self, graph): return ' -> '.join( str(node) for node in [self.tasklet, self.tmap_exit, self.reduce]) def apply(self, graph: SDFGState, sdfg: SDFG): tmap_exit = self.tmap_exit in_array = self.in_array reduce_node = self.reduce out_array = self.out_array # Set nodes to remove according to the expression index nodes_to_remove = [in_array] nodes_to_remove.append(reduce_node) memlet_edge = None for edge in graph.in_edges(tmap_exit): if edge.data.data == in_array.data: memlet_edge = edge break if memlet_edge is None: raise RuntimeError('Reduction memlet cannot be None') # Find which indices should be removed from new memlet input_edge = graph.in_edges(reduce_node)[0] axes = reduce_node.axes or list(range(len(input_edge.data.subset))) array_edge = graph.out_edges(reduce_node)[0] # Delete relevant edges and nodes graph.remove_nodes_from(nodes_to_remove) # Delete relevant data descriptors for node in set(nodes_to_remove): if isinstance(node, nodes.AccessNode): # try to delete it try: sdfg.remove_data(node.data) # will raise ValueError if the datadesc is used somewhere else except ValueError: pass # Filter out reduced dimensions from subset filtered_subset = [ dim for i, dim in enumerate(memlet_edge.data.subset) if i not in axes ] if len(filtered_subset) == 0: # Output is a scalar filtered_subset = [(0, 0, 1)] # Modify edge from tasklet to map exit memlet_edge.data.data = out_array.data memlet_edge.data.wcr = reduce_node.wcr memlet_edge.data.subset = type( memlet_edge.data.subset)(filtered_subset) # Add edge from map exit to output array graph.add_edge( memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:], array_edge.dst, array_edge.dst_conn, Memlet.simple(array_edge.data.data, array_edge.data.subset, num_accesses=array_edge.data.num_accesses, wcr_str=reduce_node.wcr)) # Add initialization state as necessary if not self.no_init and reduce_node.identity is not None: init_state = sdfg.add_state_before(graph) init_state.add_mapped_tasklet( 'freduce_init', [('o%d' % i, '%s:%s:%s' % (r[0], r[1] + 1, r[2])) for i, r in enumerate(array_edge.data.subset)], {}, '__out = %s' % reduce_node.identity, { '__out': Memlet.simple( array_edge.data.data, ','.join([ 'o%d' % i for i in range(len(array_edge.data.subset)) ])) }, external_edges=True)
class Transformation(object): """ Base class for transformations, as well as a static registry of transformations, where new transformations can be added in a decentralized manner. New transformations are registered with ``Transformation.register`` (or ``dace.registry.autoregister_params``) with two optional boolean keyword arguments: ``singlestate`` (default: False) and ``strict`` (default: False). If ``singlestate`` is True, the transformation operates on a single state; otherwise, it will be matched over an entire SDFG. If ``strict`` is True, this transformation will be considered strict (i.e., always important to perform) and will be performed automatically as part of SDFG strict transformations. """ # Properties sdfg_id = Property(dtype=int, category="(Debug)") state_id = Property(dtype=int, category="(Debug)") subgraph = SubgraphProperty(dtype=dict, category="(Debug)") expr_index = Property(dtype=int, category="(Debug)") @staticmethod def annotates_memlets(): """ Indicates whether the transformation annotates the edges it creates or modifies with the appropriate memlets. This determines whether to apply memlet propagation after the transformation. """ return False @staticmethod def expressions(): """ Returns a list of Graph objects that will be matched in the subgraph isomorphism phase. Used as a pre-pass before calling `can_be_applied`. @see Transformation.can_be_applied """ raise NotImplementedError @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): """ Returns True if this transformation can be applied on the candidate matched subgraph. :param graph: SDFGState object if this Transformation is single-state, or SDFG object otherwise. :param candidate: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. :param expr_index: The list index from `Transformation.expressions` that was matched. :param sdfg: If `graph` is an SDFGState, its parent SDFG. Otherwise should be equal to `graph`. :param strict: Whether transformation should run in strict mode. :return: True if the transformation can be applied. """ raise NotImplementedError @staticmethod def match_to_str(graph, candidate): """ Returns a string representation of the pattern match on the candidate subgraph. Used when identifying matches in the console UI. """ raise NotImplementedError def __init__(self, sdfg_id, state_id, subgraph, expr_index): """ Initializes an instance of Transformation. :param sdfg_id: A unique ID of the SDFG. :param state_id: The node ID of the SDFG state, if applicable. :param subgraph: A mapping between node IDs returned from `Transformation.expressions` and the nodes in `graph`. :param expr_index: The list index from `Transformation.expressions` that was matched. :raise TypeError: When transformation is not subclass of Transformation. :raise TypeError: When state_id is not instance of int. :raise TypeError: When subgraph is not a dict of dace.graph.nodes.Node : int. """ self.sdfg_id = sdfg_id self.state_id = state_id for value in subgraph.values(): if not isinstance(value, int): raise TypeError('All values of ' 'subgraph' ' dictionary must be ' 'instances of int.') self.subgraph = subgraph self.expr_index = expr_index def __lt__(self, other): """ Comparing two transformations by their class name and node IDs in match. Used for ordering transformations consistently. """ if type(self) != type(other): return type(self).__name__ < type(other).__name__ self_ids = iter(self.subgraph.values()) other_ids = iter(self.subgraph.values()) try: self_id = next(self_ids) except StopIteration: return True try: other_id = next(other_ids) except StopIteration: return False self_end = False while self_id is not None and other_id is not None: if self_id != other_id: return self_id < other_id try: self_id = next(self_ids) except StopIteration: self_end = True try: other_id = next(other_ids) except StopIteration: if self_end: # Transformations are equal return False return False if self_end: return True def apply_pattern(self, sdfg): """ Applies this transformation on the given SDFG. """ self.apply(sdfg) if not self.annotates_memlets(): labeling.propagate_labels_sdfg(sdfg) def __str__(self): return type(self).__name__ def modifies_graph(self): return True def print_match(self, sdfg): """ Returns a string representation of the pattern match on the given SDFG. Used for printing matches in the console UI. """ if not isinstance(sdfg, dace.SDFG): raise TypeError("Expected SDFG, got: {}".format( type(sdfg).__name__)) if self.state_id == -1: graph = sdfg else: graph = sdfg.nodes()[self.state_id] string = type(self).__name__ + ' in ' string += type(self).match_to_str(graph, self.subgraph) return string
class NestSDFG(transformation.Transformation): """ Implements SDFG Nesting, taking an SDFG as an input and creating a nested SDFG node from it. """ promote_global_trans = Property( dtype=bool, default=False, desc="Promotes transients to be allocated once") @staticmethod def annotates_memlets(): return True @staticmethod def expressions(): # Matches anything return [nx.DiGraph()] @staticmethod def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): return True @staticmethod def match_to_str(graph, candidate): return graph.label def apply(self, sdfg): outer_sdfg = sdfg nested_sdfg = dc(sdfg) outer_sdfg.arrays.clear() outer_sdfg.remove_nodes_from(outer_sdfg.nodes()) inputs = {} outputs = {} transients = {} for state in nested_sdfg.nodes(): # Input and output nodes are added as input and output nodes of the nested SDFG for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and not node.desc(nested_sdfg).transient): if (state.out_degree(node) > 0): # input node arrname = node.data if arrname not in inputs: arrobj = nested_sdfg.arrays[arrname] nested_sdfg.arrays['__' + arrname + '_in'] = arrobj outer_sdfg.arrays[arrname] = dc(arrobj) inputs[arrname] = '__' + arrname + '_in' node_data_name = '__' + arrname + '_in' if (state.in_degree(node) > 0): # output node arrname = node.data if arrname not in outputs: arrobj = nested_sdfg.arrays[arrname] nested_sdfg.arrays['__' + arrname + '_out'] = arrobj if arrname not in inputs: outer_sdfg.arrays[arrname] = dc(arrobj) outputs[arrname] = '__' + arrname + '_out' node_data_name = '__' + arrname + '_out' node.data = node_data_name if self.promote_global_trans: scope_dict = state.scope_dict() for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and node.desc(nested_sdfg).transient): arrname = node.data if arrname not in transients and not scope_dict[node]: arrobj = nested_sdfg.arrays[arrname] nested_sdfg.arrays['__' + arrname + '_out'] = arrobj outer_sdfg.arrays[arrname] = dc(arrobj) transients[arrname] = '__' + arrname + '_out' node.data = '__' + arrname + '_out' for arrname in inputs.keys(): nested_sdfg.arrays.pop(arrname) for arrname in outputs.keys(): nested_sdfg.arrays.pop(arrname, None) for oldarrname, newarrname in transients.items(): nested_sdfg.arrays.pop(oldarrname) nested_sdfg.arrays[newarrname].transient = False outputs.update(transients) # Update memlets for state in nested_sdfg.nodes(): for _, edge in enumerate(state.edges()): _, _, _, _, mem = edge src = state.memlet_path(edge)[0].src dst = state.memlet_path(edge)[-1].dst if isinstance(src, nodes.AccessNode): if (mem.data in inputs.keys() and src.data == inputs[mem.data]): mem.data = inputs[mem.data] elif (mem.data in outputs.keys() and src.data == outputs[mem.data]): mem.data = outputs[mem.data] elif (isinstance(dst, nodes.AccessNode) and mem.data in outputs.keys() and dst.data == outputs[mem.data]): mem.data = outputs[mem.data] outer_state = outer_sdfg.add_state(outer_sdfg.label) nested_node = outer_state.add_nested_sdfg(nested_sdfg, outer_sdfg, set(inputs.values()), set(outputs.values())) for key, val in inputs.items(): arrnode = outer_state.add_read(key) outer_state.add_edge( arrnode, None, nested_node, val, memlet.Memlet.from_array(key, arrnode.desc(outer_sdfg))) for key, val in outputs.items(): arrnode = outer_state.add_write(key) outer_state.add_edge( nested_node, val, arrnode, None, memlet.Memlet.from_array(key, arrnode.desc(outer_sdfg)))
class Gemm(dace.sdfg.nodes.LibraryNode): """Executes alpha * (A @ B) + beta * C. C should be unidirectionally broadcastable (ONNX terminology) to A @ B. """ # Global properties implementations = { "pure": ExpandGemmPure, "MKL": ExpandGemmMKL, "cuBLAS": ExpandGemmCuBLAS } default_implementation = None # Object fields dtype = dace.properties.TypeClassProperty(allow_none=True) transA = Property(dtype=bool, desc="Whether to transpose A before multiplying") transB = Property(dtype=bool, desc="Whether to transpose B before multiplying") alpha = Property( dtype=tuple(dace.dtypes._CONSTANT_TYPES), default=1, desc="A scalar which will be multiplied with A @ B before adding C") beta = Property( dtype=tuple(dace.dtypes._CONSTANT_TYPES), default=1, desc="A scalar which will be multiplied with C before adding C") def __init__(self, name, dtype=None, location=None, transA=False, transB=False, alpha=1, beta=0): super().__init__(name, location=location, inputs={"_a", "_b"}, outputs={"_c"}) self.dtype = dtype self.transA = transA self.transB = transB self.alpha = alpha self.beta = beta def validate(self, sdfg, state): in_edges = state.in_edges(self) if len(in_edges) not in [2, 3]: raise ValueError("Expected 2 or 3 inputs to gemm") size2 = None for _, _, _, dst_conn, memlet in state.in_edges(self): if dst_conn == '_a': subset = dc(memlet.subset) subset.squeeze() size0 = subset.size() if dst_conn == '_b': subset = dc(memlet.subset) subset.squeeze() size1 = subset.size() if self.transA: size0 = list(reversed(size0)) if self.transB: size1 = list(reversed(size1)) out_edges = state.out_edges(self) if len(out_edges) != 1: raise ValueError( "Expected exactly one output from matrix-matrix product") out_memlet = out_edges[0].data # Function is symmetric, edge order does not matter if len(size0) != 2 or len(size1) != 2: raise ValueError( "matrix-matrix product only supported on matrices") if size0[1] != size1[0]: raise ValueError("Inputs to matrix-matrix product " "must agree in the k-dimension") out_subset = dc(out_memlet.subset) out_subset.squeeze() size3 = out_subset.size() if len(size3) != 2: raise ValueError( "matrix-matrix product only supported on matrices") if len(size3) == 2 and list(size3) != [size0[-2], size1[-1]]: raise ValueError( "Output to matrix-matrix product must agree in the m and n " "dimensions")
class Tasklet(CodeNode): """ A node that contains a tasklet: a functional computation procedure that can only access external data specified using connectors. Tasklets may be implemented in Python, C++, or any supported language by the code generator. """ label = Property(dtype=str, desc="Name of the tasklet") language = Property(enum=types.Language, default=types.Language.Python) code = CodeProperty(desc="Tasklet code") code_global = CodeProperty( desc="Global scope code needed for tasklet execution", default="") code_init = CodeProperty( desc="Extra code that is called on DaCe runtime initialization", default="") code_exit = CodeProperty( desc="Extra code that is called on DaCe runtime cleanup", default="") location = Property(dtype=str, desc="Tasklet execution location descriptor") debuginfo = DebugInfoProperty() def __init__(self, label, inputs=set(), outputs=set(), code="", language=types.Language.Python, code_global="", code_init="", code_exit="", location="-1", debuginfo=None): super(Tasklet, self).__init__(inputs, outputs) # Properties self.label = label self.language = language self.code = code self.location = location self.code_global = code_global self.code_init = code_init self.code_exit = code_exit self.debuginfo = debuginfo @property def name(self): return self._label def draw_node(self, sdfg, graph): return dot.draw_node(sdfg, graph, self, shape="octagon") def validate(self, sdfg, state): if not data.validate_name(self.label): raise NameError('Invalid tasklet name "%s"' % self.label) for in_conn in self.in_connectors: if not data.validate_name(in_conn): raise NameError('Invalid input connector "%s"' % in_conn) for out_conn in self.out_connectors: if not data.validate_name(out_conn): raise NameError('Invalid output connector "%s"' % out_conn) def __str__(self): if not self.label: return "--Empty--" else: return self.label
class NestedSDFG(CodeNode): """ An SDFG state node that contains an SDFG of its own, runnable using the data dependencies specified using its connectors. It is encouraged to use nested SDFGs instead of coarse-grained tasklets since they are analyzable with respect to transformations. @note: A nested SDFG cannot create recursion (one of its parent SDFGs). """ label = Property(dtype=str, desc="Name of the SDFG") # NOTE: We cannot use SDFG as the type because of an import loop sdfg = Property(dtype=graph.OrderedDiGraph, desc="The SDFG") schedule = Property(dtype=types.ScheduleType, desc="SDFG schedule", enum=types.ScheduleType, from_string=lambda x: types.ScheduleType[x]) location = Property(dtype=str, desc="SDFG execution location descriptor") debuginfo = DebugInfoProperty() is_collapsed = Property(dtype=bool, desc="Show this node/scope/state as collapsed", default=False) def __init__(self, label, sdfg, inputs: Set[str], outputs: Set[str], schedule=types.ScheduleType.Default, location="-1", debuginfo=None): super(NestedSDFG, self).__init__(inputs, outputs) # Properties self.label = label self.sdfg = sdfg self.schedule = schedule self.location = location self.debuginfo = debuginfo def draw_node(self, sdfg, graph): return dot.draw_node(sdfg, graph, self, shape="doubleoctagon") def __str__(self): if not self.label: return "SDFG" else: return self.label def validate(self, sdfg, state): if not data.validate_name(self.label): raise NameError('Invalid nested SDFG name "%s"' % self.label) for in_conn in self.in_connectors: if not data.validate_name(in_conn): raise NameError('Invalid input connector "%s"' % in_conn) for out_conn in self.out_connectors: if not data.validate_name(out_conn): raise NameError('Invalid output connector "%s"' % out_conn) # Recursively validate nested SDFG self.sdfg.validate()
class Consume(object): """ Consume is a scope, like `Map`, that is a part of the parametric graph extension of the SDFG. It creates a producer-consumer relationship between the input stream and the scope subgraph. The subgraph is scheduled to a given number of processing elements for processing, and they will try to pop elements from the input stream until a given quiescence condition is reached. """ # Properties label = Property(dtype=str, desc="Name of the consume node") pe_index = Property(dtype=str, desc="Processing element identifier") num_pes = SymbolicProperty(desc="Number of processing elements") condition = CodeProperty(desc="Quiescence condition", allow_none=True) language = Property(enum=types.Language, default=types.Language.Python) schedule = Property(dtype=types.ScheduleType, desc="Consume schedule", enum=types.ScheduleType, from_string=lambda x: types.ScheduleType[x]) chunksize = Property(dtype=int, desc="Maximal size of elements to consume at a time", default=1) debuginfo = DebugInfoProperty() is_collapsed = Property(dtype=bool, desc="Show this node/scope/state as collapsed", default=False) def as_map(self): """ Compatibility function that allows to view the consume as a map, mainly in memlet propagation. """ return Map(self.label, [self.pe_index], sbs.Range([(0, self.num_pes - 1, 1)]), self.schedule) def __init__(self, label, pe_tuple, condition, schedule=types.ScheduleType.Default, chunksize=1, debuginfo=None): super(Consume, self).__init__() # Properties self.label = label self.pe_index, self.num_pes = pe_tuple self.condition = condition self.schedule = schedule self.chunksize = chunksize self.debuginfo = debuginfo def __str__(self): if self.condition is not None: return ("%s [%s=0:%s], Condition: %s" % (self._label, self.pe_index, self.num_pes, CodeProperty.to_string(self.condition))) else: return ("%s [%s=0:%s]" % (self._label, self.pe_index, self.num_pes)) def validate(self, sdfg, state, node): if not data.validate_name(self.label): raise NameError('Invalid consume name "%s"' % self.label) def get_param_num(self): """ Returns the number of consume dimension parameters/symbols. """ return 1
class Map(object): """ A Map is a two-node representation of parametric graphs, containing an integer set by which the contents (nodes dominated by an entry node and post-dominated by an exit node) are replicated. Maps contain a `schedule` property, which specifies how the scope should be scheduled (execution order). Code generators can use the schedule property to generate appropriate code, e.g., GPU kernels. """ from dace.codegen.instrumentation.perfsettings import PerfSettings # List of (editable) properties label = Property(dtype=str, desc="Label of the map") params = ParamsProperty(desc="Mapped parameters") range = RangeProperty(desc="Ranges of map parameters") # order = OrderProperty(desc="Order of map dimensions", unmapped=True) schedule = Property(dtype=types.ScheduleType, desc="Map schedule", enum=types.ScheduleType, from_string=lambda x: types.ScheduleType[x]) is_async = Property(dtype=bool, desc="Map asynchronous evaluation") unroll = Property(dtype=bool, desc="Map unrolling") flatten = Property(dtype=bool, desc="Map loop flattening") fence_instrumentation = Property( dtype=bool, desc="Disable instrumentation in all subnodes") papi_counters = Property(dtype=list, desc="List of PAPI counter preset identifiers.", default=PerfSettings.perf_default_papi_counters()) debuginfo = DebugInfoProperty() is_collapsed = Property(dtype=bool, desc="Show this node/scope/state as collapsed", default=False) # We cannot have multiple consecutive papi start/stops inside the same thread. The following variable is used to recognize the map that started the counters. _has_papi_counters = False _can_be_supersection_start = True # We must have supersections synchronized. def __init__(self, label, params, ndrange, schedule=types.ScheduleType.Default, unroll=False, is_async=False, flatten=False, fence_instrumentation=False, debuginfo=None): super(Map, self).__init__() # Assign properties self.label = label self.schedule = schedule self.unroll = unroll self.is_async = is_async self.flatten = flatten self.params = params self.range = ndrange self.debuginfo = debuginfo self._fence_instrumentation = fence_instrumentation def __str__(self): return self.label + "[" + ", ".join([ "{}={}".format(i, r) for i, r in zip(self._params, [sbs.Range.dim_to_string(d) for d in self._range]) ]) + "]" def validate(self, sdfg, state, node): if not data.validate_name(self.label): raise NameError('Invalid map name "%s"' % self.label) def get_param_num(self): """ Returns the number of map dimension parameters/symbols. """ return len(self.params)