def _define_local_scalar( sdfg: SDFG, state: SDFGState, dtype: dace.typeclass, storage: dtypes.StorageType = dtypes.StorageType.Default): """ Defines a local scalar in a DaCe program. """ name = sdfg.temp_data_name() sdfg.add_scalar(name, dtype, transient=True, storage=storage) return name
def _cart_create(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, dims: ShapeType): """ Creates a process-grid and adds it to the DaCe program. The process-grid is implemented with [MPI_Cart_create](https://www.mpich.org/static/docs/latest/www3/MPI_Cart_create.html). :param dims: Shape of the process-grid (see `dims` parameter of `MPI_Cart_create`), e.g., [2, 3, 3]. :return: Name of the new process-grid descriptor. """ pgrid_name = sdfg.add_pgrid(dims) # Dummy tasklet adds MPI variables to the program's state. from dace.libraries.mpi import Dummy tasklet = Dummy(pgrid_name, [ f'MPI_Comm {pgrid_name}_comm;', f'MPI_Group {pgrid_name}_group;', f'int {pgrid_name}_coords[{len(dims)}];', f'int {pgrid_name}_dims[{len(dims)}];', f'int {pgrid_name}_rank;', f'int {pgrid_name}_size;', f'bool {pgrid_name}_valid;', ]) state.add_node(tasklet) # Pseudo-writing to a dummy variable to avoid removal of Dummy node by transformations. _, scal = sdfg.add_scalar(pgrid_name, dace.int32, transient=True) wnode = state.add_write(pgrid_name) state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(pgrid_name, scal)) return pgrid_name
def _subarray(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, array: Union[str, ShapeType], subarray: Union[str, ShapeType], dtype: dtypes.typeclass = None, process_grid: str = None, correspondence: Sequence[Integral] = None): """ Adds a sub-array descriptor to the DaCe Program. Sub-arrays are implemented (when `process_grid` is set) with [MPI_Type_create_subarray](https://www.mpich.org/static/docs/v3.2/www3/MPI_Type_create_subarray.html). :param array: Either the name of an Array descriptor or the shape of the array (similar to the `array_of_sizes` parameter of `MPI_Type_create_subarray`). :param subarray: Either the name of an Array descriptor or the sub-shape of the (sub-)array (similar to the `array_of_subsizes` parameter of `MPI_Type_create_subarray`). :param dtype: Datatype of the array/sub-array (similar to the `oldtype` parameter of `MPI_Type_create_subarray`). :process_grid: Name of the process-grid for collective scatter/gather operations. :param correspondence: Matching of the array/sub-array's dimensions to the process-grid's dimensions. :return: Name of the new sub-array descriptor. """ # Get dtype, shape, and subshape if isinstance(array, str): shape = sdfg.arrays[array].shape arr_dtype = sdfg.arrays[array].dtype else: shape = array arr_dtype = None if isinstance(subarray, str): subshape = sdfg.arrays[subarray].shape sub_dtype = sdfg.arrays[subarray].dtype else: subshape = subarray sub_dtype = None dtype = dtype or arr_dtype or sub_dtype subarray_name = sdfg.add_subarray(dtype, shape, subshape, process_grid, correspondence) # Generate subgraph only if process-grid is set, i.e., the sub-array will be used for collective scatter/gather ops. if process_grid: # Dummy tasklet adds MPI variables to the program's state. from dace.libraries.mpi import Dummy tasklet = Dummy(subarray_name, [ f'MPI_Datatype {subarray_name};', f'int* {subarray_name}_counts;', f'int* {subarray_name}_displs;' ]) state.add_node(tasklet) # Pseudo-writing to a dummy variable to avoid removal of Dummy node by transformations. _, scal = sdfg.add_scalar(subarray_name, dace.int32, transient=True) wnode = state.add_write(subarray_name) state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(subarray_name, scal)) return subarray_name
def _cart_sub(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, parent_grid: str, color: Sequence[Union[Integral, bool]], exact_grid: RankType = None): """ Partitions the `parent_grid` to lower-dimensional sub-grids and adds them to the DaCe program. The sub-grids are implemented with [MPI_Cart_sub](https://www.mpich.org/static/docs/latest/www3/MPI_Cart_sub.html). :param parent_grid: Parent process-grid (similar to the `comm` parameter of `MPI_Cart_sub`). :param color: The i-th entry specifies whether the i-th dimension is kept in the sub-grid or is dropped (see `remain_dims` input of `MPI_Cart_sub`). :param exact_grid: [DEVELOPER] If set then, out of all the sub-grids created, only the one that contains the rank with id `exact_grid` will be utilized for collective communication. :return: Name of the new sub-grid descriptor. """ pgrid_name = sdfg.add_pgrid(parent_grid=parent_grid, color=color, exact_grid=exact_grid) # Count sub-grid dimensions. pgrid_ndims = sum([bool(c) for c in color]) # Dummy tasklet adds MPI variables to the program's state. from dace.libraries.mpi import Dummy tasklet = Dummy(pgrid_name, [ f'MPI_Comm {pgrid_name}_comm;', f'MPI_Group {pgrid_name}_group;', f'int {pgrid_name}_coords[{pgrid_ndims}];', f'int {pgrid_name}_dims[{pgrid_ndims}];', f'int {pgrid_name}_rank;', f'int {pgrid_name}_size;', f'bool {pgrid_name}_valid;', ]) state.add_node(tasklet) # Pseudo-writing to a dummy variable to avoid removal of Dummy node by transformations. _, scal = sdfg.add_scalar(pgrid_name, dace.int32, transient=True) wnode = state.add_write(pgrid_name) state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(pgrid_name, scal)) return pgrid_name
class ONNXModel: """Loads an ONNX model into an SDFG.""" def __init__(self, name, model: onnx.ModelProto, cuda=False): """ Constructs a new ONNXImporter. :param name: the name for the SDFG. :param model: the model to import. :param cuda: if `True`, weights will be passed as cuda arrays. """ graph: onnx.GraphProto = model.graph self.sdfg = SDFG(name) self.cuda = cuda self.state = self.sdfg.add_state() # Add all values to the SDFG, check for unsupported ops ########################################## self.value_infos = {} self.inputs = [] self.outputs = [] for value, is_input in chain(zip(graph.input, repeat(True)), zip(graph.output, repeat(False))): if not value.HasField("name"): raise ValueError("Got input or output without name") if is_input: self.inputs.append(value.name) else: self.outputs.append(value.name) self.value_infos[value.name] = value self._add_value_info(value) for value in graph.value_info: if not value.HasField("name"): raise ValueError("Got input or output without name") if value.name not in self.value_infos: self.value_infos[value.name] = value # add weights self.weights = {} for init in graph.initializer: self._add_constant_tensor(init) access_nodes = {} self._idx_to_node = [] for i, node in enumerate(graph.node): if not has_onnx_node(node.op_type): raise ValueError("Unsupported ONNX operator: '{}'".format( node.op_type)) # extract the op attributes op_attributes = { attribute_proto.name: convert_attribute_proto(attribute_proto) for attribute_proto in node.attribute } if node.HasField("name"): node_name = clean_onnx_name(node.name) else: node_name = node.op_type + "_" + str(i) # construct the dace node op_node = get_onnx_node(node.op_type)(node_name, **op_attributes) self.state.add_node(op_node) self._idx_to_node.append(op_node) for param_idx, (name, is_input) in chain( enumerate(zip(node.input, repeat(True))), enumerate(zip(node.output, repeat(False)))): if clean_onnx_name(name) not in self.sdfg.arrays: if name not in self.value_infos: raise ValueError( "Could not find array with name '{}'".format(name)) self._add_value_info(self.value_infos[name]) # get the access node if name in access_nodes: access = access_nodes[name] self._update_access_type(access, is_input) else: access = nd.AccessNode( clean_onnx_name(name), AccessType.ReadOnly if is_input else AccessType.WriteOnly) self.state.add_node(access) access_nodes[name] = access # get the connector name params = op_node.schema.inputs if is_input else op_node.schema.outputs params_len = len(params) if param_idx >= params_len: # this is a variadic parameter. Then the last parameter of the parameter must be variadic. if params[-1].param_type != ONNXParameterType.Variadic: raise ValueError( "Expected the last {i_or_o} parameter to be variadic," " since the {i_or_o} with idx {param_idx} has more parameters than the schema ({params_len})" .format(i_or_o="input" if is_input else "output", param_idx=param_idx, params_len=params_len)) conn_name = params[-1].name + "__" + str(param_idx - params_len + 1) elif params[ param_idx].param_type == ONNXParameterType.Variadic: # this is a variadic parameter, and it is within the range of params, so it must be the first # instance of a variadic parameter conn_name = params[param_idx].name + "__0" else: conn_name = params[param_idx].name data_desc = self.sdfg.arrays[clean_onnx_name(name)] # add the connector if required, and add an edge if is_input: if conn_name not in op_node.in_connectors: op_node.add_in_connector(conn_name) self.state.add_edge( access, None, op_node, conn_name, dace.Memlet.from_array(clean_onnx_name(name), data_desc)) else: if conn_name not in op_node.out_connectors: op_node.add_out_connector(conn_name) self.state.add_edge( op_node, conn_name, access, None, dace.Memlet.from_array(clean_onnx_name(name), data_desc)) if self.cuda: self.sdfg.apply_strict_transformations() self.sdfg.apply_gpu_transformations() self.sdfg.apply_strict_transformations() # set all gpu transients to be persistent for _, _, arr in self.sdfg.arrays_recursive(): if arr.transient and arr.storage == StorageType.GPU_Global: arr.lifetime = AllocationLifetime.Persistent @staticmethod def _update_access_type(node: dace.nodes.AccessNode, is_input: bool): if node.access == AccessType.ReadOnly and not is_input: node.access = AccessType.ReadWrite elif node.access == AccessType.WriteOnly and is_input: node.access = AccessType.ReadWrite def _add_constant_tensor(self, tensor: onnx.TensorProto): if not tensor.HasField("name"): raise ValueError("Got tensor without name") if not tensor.HasField("data_type"): raise ValueError("Initializer tensor '{}' has no type".format( tensor.name)) name = clean_onnx_name(tensor.name) dtype = onnx_tensor_type_to_typeclass(tensor.data_type) if len(tensor.dims) == 0: # this is a scalar self.sdfg.add_scalar(name, dtype) else: dims = [d for d in tensor.dims] if name not in self.sdfg.arrays: self.sdfg.add_array(name, dims, dtype) else: existing_arr = self.sdfg.arrays[name] if existing_arr.dtype != dtype: raise ValueError( "Invalid ONNX model; found two values with name '{}', but different dtypes ({} and {})" .format(name, existing_arr.dtype, dtype)) if tuple(existing_arr.shape) != tuple(dims): raise ValueError( "Invalid ONNX model; found two values with name '{}', but different dimensions ({} and {})" .format(name, existing_arr.shape, dims)) self.weights[tensor.name] = numpy_helper.to_array(tensor) def _add_value_info(self, value_info: onnx.ValueInfoProto): if not value_info.HasField("name"): raise ValueError("Got value without name") name = value_info.name if not _nested_HasField(value_info, "type.tensor_type.shape"): raise ValueError( "Value '{}' does not have a shape in this graph." " Please run shape inference before importing.".format(name)) tensor_type = value_info.type.tensor_type if not tensor_type.HasField("elem_type"): raise ValueError( "Value '{}' does not have a type in this graph." " Please run type inference before importing.".format(name)) shape = [] for d in tensor_type.shape.dim: if d.HasField("dim_value"): shape.append(d.dim_value) elif d.HasField("dim_param"): parsed = pystr_to_symbolic(d.dim_param) for sym in parsed.free_symbols: if clean_onnx_name(str(sym)) not in self.sdfg.symbols: self.sdfg.add_symbol(clean_onnx_name(str(sym)), stype=int) parsed = parsed.subs( sym, dace.symbol(clean_onnx_name(str(sym)))) shape.append(parsed) else: raise ValueError( "Value '{}' does not have a shape in this graph." " Please run shape inference before importing.".format( name)) transient = name not in self.inputs and name not in self.outputs if len(shape) == 0: self.sdfg.add_scalar(clean_onnx_name(name), dtype=onnx_tensor_type_to_typeclass( tensor_type.elem_type), transient=transient) else: self.sdfg.add_array(clean_onnx_name(name), shape=shape, dtype=onnx_tensor_type_to_typeclass( tensor_type.elem_type), transient=transient) def __call__(self, *args, **inputs): sdfg = deepcopy(self.sdfg) # convert the positional args to kwargs if len(args) > len(self.inputs): raise ValueError("Expected {} arguments, got {}".format( len(self.inputs), len(args))) inputs.update(dict(zip(self.inputs, args))) # check that there are no missing inputs if len(set(self.inputs).difference(inputs)) != 0: raise ValueError("Missing inputs {}".format(", ".join( set(self.inputs).difference(inputs)))) # check that there are no unknown inputs # NOTE symbols can only be passed as kwargs if len( set(inputs).difference(self.inputs).difference( sdfg.free_symbols)) != 0: raise ValueError("Unknown inputs {}".format(", ".join( set(inputs).difference(self.inputs)))) clean_inputs = {} for input, arr in inputs.items(): if input in sdfg.free_symbols: clean_inputs[input] = arr else: clean_inputs[clean_onnx_name(input)] = arr # add the weights params = {} for name, arr in self.weights.items(): if len(arr.shape) == 0: params[clean_onnx_name(name)] = arr[()] else: if self.cuda: clean_name = clean_onnx_name(name) sdfg.arrays[clean_name].storage = StorageType.GPU_Global params[clean_name] = numba.cuda.to_device(arr) else: params[clean_onnx_name(name)] = arr.copy() inferred_symbols = infer_symbols_from_shapes(sdfg, { **clean_inputs, **params }) # TODO @orausch if this is removed the SDFG complains # TypeError: Type mismatch for argument ONNX_unk__493: expected scalar type, got <class 'sympy.core.numbers.Integer'> # fix this better inferred_symbols = {k: int(v) for k, v in inferred_symbols.items()} def eval_dim(dim): for sym in dim.free_symbols: dim = dim.subs(sym, inferred_symbols[sym.name]) return dim outputs = OrderedDict() # create numpy arrays for the outputs for output in self.outputs: clean_name = clean_onnx_name(output) arr = sdfg.arrays[clean_name] # TODO @orausch add error handling for evalf shape = [ eval_dim(d) if type(d) is dace.symbol else d for d in arr.shape ] outputs[clean_name] = np.empty(shape, dtype=arr.dtype.as_numpy_dtype()) sdfg.expand_library_nodes() #sdfg.apply_strict_transformations() sdfg(**clean_inputs, **params, **outputs, **inferred_symbols) if len(outputs) == 1: return next(iter(outputs.values())) return tuple(outputs.values())
def _redistribute(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, in_buffer: str, in_subarray: str, out_buffer: str, out_subarray: str): """ Redistributes an Array using process-grids, sub-arrays, and the Redistribute library node. :param in_buffer: Name of the (local) input Array descriptor. :param in_subarray: Input sub-array descriptor. :param out_buffer: Name of the (local) output Array descriptor. :param out_subarray: Output sub-array descriptor. :return: Name of the new redistribution descriptor. """ in_desc = sdfg.arrays[in_buffer] out_desc = sdfg.arrays[out_buffer] rdistrarray_name = sdfg.add_rdistrarray(in_subarray, out_subarray) from dace.libraries.mpi import Dummy, Redistribute tasklet = Dummy(rdistrarray_name, [ f'MPI_Datatype {rdistrarray_name};', f'int {rdistrarray_name}_sends;', f'MPI_Datatype* {rdistrarray_name}_send_types;', f'int* {rdistrarray_name}_dst_ranks;', f'int {rdistrarray_name}_recvs;', f'MPI_Datatype* {rdistrarray_name}_recv_types;', f'int* {rdistrarray_name}_src_ranks;', f'int {rdistrarray_name}_self_copies;', f'int* {rdistrarray_name}_self_src;', f'int* {rdistrarray_name}_self_dst;', f'int* {rdistrarray_name}_self_size;' ]) state.add_node(tasklet) _, scal = sdfg.add_scalar(rdistrarray_name, dace.int32, transient=True) wnode = state.add_write(rdistrarray_name) state.add_edge(tasklet, '__out', wnode, None, Memlet.from_array(rdistrarray_name, scal)) libnode = Redistribute('_Redistribute_', rdistrarray_name) inbuf_range = None if isinstance(in_buffer, tuple): inbuf_name, inbuf_range = in_buffer else: inbuf_name = in_buffer in_desc = sdfg.arrays[inbuf_name] inbuf_node = state.add_read(inbuf_name) outbuf_range = None if isinstance(out_buffer, tuple): outbuf_name, outbuf_range = out_buffer else: outbuf_name = out_buffer out_desc = sdfg.arrays[outbuf_name] outbuf_node = state.add_write(outbuf_name) if inbuf_range: inbuf_mem = Memlet.simple(inbuf_name, inbuf_range) else: inbuf_mem = Memlet.from_array(inbuf_name, in_desc) if outbuf_range: outbuf_mem = Memlet.simple(outbuf_name, outbuf_range) else: outbuf_mem = Memlet.from_array(outbuf_name, out_desc) state.add_edge(inbuf_node, None, libnode, '_inp_buffer', inbuf_mem) state.add_edge(libnode, '_out_buffer', outbuf_node, None, outbuf_mem) return rdistrarray_name
if __name__ == '__main__': print('SDFG consecutive tasklet test') # Externals (parameters, symbols) N = dp.symbol('N') N.set(20) input = dp.ndarray([N], dp.int32) output = dp.ndarray([N], dp.int32) input[:] = dp.int32(5) output[:] = dp.int32(0) # Construct SDFG mysdfg = SDFG('ctasklet') state = mysdfg.add_state() A_ = state.add_array('A', [N], dp.int32) B_ = state.add_array('B', [N], dp.int32) mysdfg.add_scalar('something', dp.int32) map_entry, map_exit = state.add_map('mymap', dict(i='0:N')) tasklet = state.add_tasklet('mytasklet', {'a'}, {'b'}, 'b = 5*a') state.add_edge(map_entry, None, tasklet, 'a', Memlet.simple(A_, 'i')) tasklet2 = state.add_tasklet('mytasklet2', {'c'}, {'d'}, 'd = 2*c') state.add_edge(tasklet, 'b', tasklet2, 'c', Memlet.simple('something', '0')) state.add_edge(tasklet2, 'd', map_exit, None, Memlet.simple(B_, 'i')) # Add outer edges state.add_edge(A_, None, map_entry, None, Memlet.simple(A_, '0:N')) state.add_edge(map_exit, None, B_, None, Memlet.simple(B_, '0:N')) # Left for debugging purposes mysdfg.draw_to_file()
if __name__ == '__main__': print('SDFG consecutive tasklet (nested) test') # Externals (parameters, symbols) N = dp.symbol('N') N.set(20) input = dp.ndarray([N], dp.int32) output = dp.ndarray([N], dp.int32) input[:] = dp.int32(5) output[:] = dp.int32(0) # Construct SDFG mysdfg = SDFG('ctasklet') state = mysdfg.add_state() A_ = state.add_array('A', [N], dp.int32) B_ = state.add_array('B', [N], dp.int32) mysdfg.add_scalar('something', dp.int32, transient=True) omap_entry, omap_exit = state.add_map('omap', dict(k='0:2')) map_entry, map_exit = state.add_map('mymap', dict(i='0:N/2')) tasklet = state.add_tasklet('mytasklet', {'a'}, {'b'}, 'b = 5*a') state.add_edge(map_entry, None, tasklet, 'a', Memlet.simple(A_, 'k*N/2+i')) tasklet2 = state.add_tasklet('mytasklet2', {'c'}, {'d'}, 'd = 2*c') state.add_edge(tasklet, 'b', tasklet2, 'c', Memlet.simple('something', '0')) state.add_edge(tasklet2, 'd', map_exit, None, Memlet.simple(B_, 'k*N/2+i')) # Add outer edges state.add_edge(A_, None, omap_entry, None, Memlet.simple(A_, '0:N')) state.add_edge(omap_entry, None, map_entry, None, Memlet.simple(A_, 'k*N/2:(k+1)*N/2')) state.add_edge(map_exit, None, omap_exit, None,