def presynchronize_streams(sdfg, dfg, state_id, node, callsite_stream): state_dfg = sdfg.nodes()[state_id] if hasattr(node, "_cuda_stream") or is_devicelevel_gpu( sdfg, state_dfg, node): return backend = Config.get('compiler', 'cuda', 'backend') for e in state_dfg.in_edges(node): if hasattr(e.src, "_cuda_stream"): cudastream = "__state->gpu_context->streams[%d]" % e.src._cuda_stream callsite_stream.write( "%sStreamSynchronize(%s);" % (backend, cudastream), sdfg, state_id, [e.src, e.dst], )
def _run_liveoutput(command, **kwargs): process = subprocess.Popen( command, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, **kwargs) output = six.StringIO() while True: line = process.stdout.readline().rstrip() if not line: break output.write(line.decode('utf-8') + '\n') if Config.get_bool('debugprint'): print(line.decode('utf-8'), flush=True) stdout, stderr = process.communicate() if Config.get_bool('debugprint'): print(stdout.decode('utf-8'), flush=True) if stderr is not None: print(stderr.decode('utf-8'), flush=True) output.write(stdout.decode('utf-8')) if stderr is not None: output.write(stderr.decode('utf-8')) # An error occurred, raise exception if process.returncode != 0: raise subprocess.CalledProcessError(process.returncode, command, output.getvalue())
def set_settings(settings_array, client_id): from dace.config import Config if not os.path.isdir("./client_configs"): os.mkdir("./client_configs/") clientpath = "./client_configs/" + client_id + ".conf" if os.path.isfile(clientpath): Config.load(clientpath) else: Config.load() for path, val in settings_array.items(): path = path.split("/") Config.set(*path, value=val) Config.save(clientpath)
def test_nccl_reduce(): ng = Config.get('compiler', 'cuda', 'max_number_gpus') n = 15 sdfg: dace.SDFG = nccl_reduce.to_sdfg(strict=True) gpu_map = find_map_by_param(sdfg, 'gpu') gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice infer_types.set_default_schedule_storage_types_and_location(sdfg, None) sdfg.specialize(dict(root_device=0, num_gpus=ng)) out = np.ndarray(shape=n, dtype=np_dtype) out.fill(0) sdfg(out=out, N=n) assert np.unique(out)[0] == sum(range(ng))
def render_config_dialog(self): # Load metadata for configuration Config.load_schema() self.window = Gtk.Window() notebook = Gtk.Notebook() notebook.set_scrollable(True) self.window.add(notebook) # General (top-level) settings gtklabel = Gtk.Label() gtklabel.set_label('General') general_grid = Gtk.Grid() general_grid.set_hexpand(True) notebook.append_page(general_grid, gtklabel) columized = False for i, (cname, cval) in enumerate(sorted(Config.get().items())): meta = Config.get_metadata(cname) if meta['type'] == 'dict': gtklabel = Gtk.Label() gtklabel.set_label(meta['title']) grid = Gtk.Grid() grid.set_hexpand(True) notebook.append_page(grid, gtklabel) self.render_config_subtree(cval, (cname, ), grid) continue if columized == False: general_grid.insert_column(0) general_grid.insert_column(1) columized = True self.render_config_element(cval, (cname, ), general_grid, i, meta) self.window.show_all() self.window.connect("delete-event", self.win_close_callback, None)
def on_sdfg_begin(self, sdfg, local_stream, global_stream, codegen): if sdfg.parent is None and PAPIUtils.is_papi_used(sdfg): # Configure CMake project and counters self.configure_papi() if not self._papi_used: return # Add instrumentation includes and initialize PAPI global_stream.write('#include <dace/perf/papi.h>', sdfg) local_stream.write( '''dace::perf::PAPI::init(); dace::perf::PAPIValueStore<%s> __perf_store (__state->report);''' % (', '.join(self._counters)), sdfg) # Get the measured overhead and take the minimum to compensate if Config.get_bool('instrumentation', 'papi', 'overhead_compensation'): local_stream.write("__perf_store.getMeasuredOverhead();", sdfg)
def test_nccl_send_recv(): ng = Config.get('compiler', 'cuda', 'max_number_gpus') if ng < 2: raise ValueError('This test needs to run with at least 2 GPUs.') else: ng = 2 sdfg: dace.SDFG = nccl_send_recv.to_sdfg(strict=True) gpu_map = find_map_by_param(sdfg, 'gpu_id') gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice infer_types.set_default_schedule_storage_types_and_location(sdfg, None) sdfg.specialize(dict(num_gpus=ng)) out = sdfg() res = np.array([0, 1]) assert np.allclose(np.unique(out), res), f'\nout: {out}\nres: {res}\n'
def testDefaultDataTypes(self): # check that configuration about defult data types is enforced config_data_types = Config.get('compiler', 'default_data_types') code_str = """value1 = 10 value2=3.14 value3=5000000000""" inf_symbols = type_inference.infer_types(code_str) if config_data_types.lower() == "python": self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int64)) self.assertEqual(inf_symbols["value2"], dtypes.typeclass(np.float64)) elif config_data_types.lower() == "c": self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int32)) self.assertEqual(inf_symbols["value2"], dtypes.typeclass(np.float32)) # in any case, value3 needs uint64 self.assertEqual(inf_symbols["value3"], dtypes.typeclass(np.uint64))
def test_nccl_allreduce(): ng = Config.get('compiler', 'cuda', 'max_number_gpus') n = 15 sdfg: dace.SDFG = nccl_allreduce.to_sdfg(strict=True) state = sdfg.start_state gpu_map = state.nodes()[0] gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice infer_types.set_default_schedule_storage_types_and_location(sdfg, None) sdfg.specialize(dict(num_gpus=ng)) out = np.ndarray(shape=n, dtype=np_dtype) out.fill(0) sdfg(out=out, N=n) res = sum(range(ng)) assert np.unique(out)[0] == res
def bounding_box_union(subset_a: Subset, subset_b: Subset) -> Range: """ Perform union by creating a bounding-box of two subsets. """ if subset_a.dims() != subset_b.dims(): raise ValueError('Dimension mismatch between %s and %s' % (str(subset_a), str(subset_b))) # Check whether all expressions containing a symbolic value should # always be evaluated to positive. If so, union will yield # a different result respectively. symbolic_positive = Config.get('optimizer', 'symbolic_positive') if not symbolic_positive: result = [(min(arb, brb), max(are, bre), 1) for arb, brb, are, bre in zip( subset_a.min_element(), subset_b.min_element(), subset_a.max_element(), subset_b.max_element())] else: result = [] for arb, brb, are, bre in zip(subset_a.min_element(), subset_b.min_element(), subset_a.max_element(), subset_b.max_element()): try: minrb = min(arb, brb) except TypeError: if len(arb.free_symbols) == 0: minrb = arb elif len(brb.free_symbols) == 0: minrb = brb else: raise try: maxre = max(are, bre) except TypeError: if len(are.free_symbols) == 0: maxre = bre elif len(bre.free_symbols) == 0: maxre = are else: raise result.append((minrb, maxre, 1)) return Range(result)
def timethis(program, title, flop_count, f, *args, **kwargs): """ Runs a function multiple (`DACE_treps`) times, logs the running times to a file, and prints the median time (with FLOPs if given). @param program: The title of the measurement. @param title: A sub-title of the measurement. @param flop_count: Number of floating point operations in `program`. If greater than zero, produces a median FLOPS report. @param f: The function to measure. @param args: Arguments to invoke the function with. @param kwargs: Keyword arguments to invoke the function with. @return: Latest return value of the function. """ start = timer() REPS = int(Config.get('treps')) times = [start] * (REPS + 1) ret = None for i in range(REPS): # Call function ret = f(*args, **kwargs) times[i + 1] = timer() diffs = np.array([(times[i] - times[i - 1]) for i in range(1, REPS + 1)]) problem_size = sys.argv[1] if len(sys.argv) >= 2 else 0 if not os.path.isfile('results.log'): with open('results.log', 'w') as f: f.write('Program\tOptimization\tProblem_Size\tRuntime_sec\n') with open('results.log', 'w') as f: for d in diffs: f.write('%s\t%s\t%s\t%.8f\n' % (program, title, problem_size, d)) if flop_count > 0: gflops_arr = (flop_count / diffs) * 1e-9 time_secs = np.median(diffs) GFLOPs = (flop_count / time_secs) * 1e-9 print(title, GFLOPs, 'GFLOP/s (', time_secs * 1000, 'ms)') else: time_secs = np.median(diffs) print(title, time_secs * 1000, 'ms') return ret
def __call__(self, **kwargs): try: argtuple = self._construct_args(**kwargs) # Call initializer function if necessary, then SDFG if self._initialized is False: self.initialize(*argtuple) # PROFILING if Config.get_bool('profiling'): operations.timethis(self._sdfg.name, 'DaCe', 0, self._cfunc, *argtuple) else: return self._cfunc(*argtuple) except (RuntimeError, TypeError, UnboundLocalError, KeyError, DuplicateDLLError, ReferenceError): self._lib.unload() raise
def _try_to_match_transformation(graph: Union[SDFG, SDFGState], collapsed_graph: nx.DiGraph, subgraph: Dict[int, int], sdfg: SDFG, xform: Union[xf.PatternTransformation, Type[xf.PatternTransformation]], expr_idx: int, nxpattern: nx.DiGraph, state_id: int, permissive: bool, options: Dict[str, Any]) -> Optional[xf.PatternTransformation]: """ Helper function that tries to instantiate a pattern match into a transformation object. """ subgraph = { nxpattern.nodes[j]['node']: graph.node_id(collapsed_graph.nodes[i]['node']) for i, j in subgraph.items() } try: if isinstance(xform, xf.PatternTransformation): match = xform else: # Construct directly from type with options opts = options or {} try: match = xform(**opts) except TypeError: # Backwards compatibility, transformation does not support ctor arguments match = xform() # Set manually for oname, oval in opts.items(): setattr(match, oname, oval) match.setup_match(sdfg, sdfg.sdfg_id, state_id, subgraph, expr_idx, options=options) match_found = match.can_be_applied(graph, expr_idx, sdfg, permissive=permissive) except Exception as e: if Config.get_bool('optimizer', 'match_exception'): raise if not isinstance(xform, type): xft = type(xform) else: xft = xform print('WARNING: {p}::can_be_applied triggered a {c} exception:' ' {e}'.format(p=xft.__name__, c=e.__class__.__name__, e=e)) return None if match_found: return match return None
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = gnode(RedundantSecondArray._in_array) out_array = gnode(RedundantSecondArray._out_array) # We assume the following pattern: A -- e1 --> B -- e2 --> others # 1. Get edge e1 and extract subsets for arrays A and B e1 = graph.edges_between(in_array, out_array)[0] a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays) # 2. Iterate over the e2 edges and traverse the memlet tree for e2 in graph.out_edges(out_array): path = graph.memlet_tree(e2) for e3 in path: # 2-a. Extract subsets for array B and others b3_subset, other_subset = _validate_subsets( e3, sdfg.arrays, src_name=out_array.data) # 2-b. Modify memlet to match array A. Example: # A -- (0, a:b)/(c:c+b) --> B -- (c+d)/None --> others # A -- (0, a+d)/None --> others e3.data.data = in_array.data # (c+d) - (c:c+b) = (d) b3_subset.offset(b1_subset, negative=True) # (0, a:b)(d) = (0, a+d) (or offset for indices) if isinstance(a_subset, subsets.Indices): tmp = copy.deepcopy(a_subset) tmp.offset(b3_subset, negative=False) e3.data.subset = tmp else: e3.data.subset = a_subset.compose(b3_subset) e3.data.other_subset = other_subset # 2-c. Remove edge and add new one graph.remove_edge(e2) graph.add_edge(in_array, e2.src_conn, e2.dst, e2.dst_conn, e2.data) # Finally, remove out_array node graph.remove_node(out_array) # TODO: Should the array be removed from the SDFG? # del sdfg.arrays[out_array] if Config.get_bool("debugprint"): RedundantSecondArray._arrays_removed += 1
def cmake_options(): host_flags = Config.get("compiler", "xilinx", "host_flags") synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags") build_flags = Config.get("compiler", "xilinx", "build_flags") mode = Config.get("compiler", "xilinx", "mode") target_platform = Config.get("compiler", "xilinx", "platform") enable_debugging = ("ON" if Config.get_bool( "compiler", "xilinx", "enable_debugging") else "OFF") autobuild = ("ON" if Config.get_bool("compiler", "autobuild_bitstreams") else "OFF") options = [ "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags), "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags), "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags), "-DDACE_XILINX_MODE={}".format(mode), "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform), "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging), "-DDACE_FPGA_AUTOBUILD_BITSTREAM={}".format(autobuild) ] # Override Vitis/SDx/SDAccel installation directory if Config.get("compiler", "xilinx", "path"): options.append("-DVITIS_ROOT_DIR=\"{}\"".format( Config.get("compiler", "xilinx", "path").replace("\\", "/"))) return options
def test_nccl_reduce_symbolic(): ng = Config.get('compiler', 'cuda', 'max_number_gpus') n = 2 sdfg: dace.SDFG = nccl_reduce_symbolic.to_sdfg(strict=True) outer_map = find_map_by_param(sdfg, 'root_gpu') if outer_map: outer_map.schedule = dtypes.ScheduleType.Sequential gpu_map = find_map_by_param(sdfg, 'gpu') gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice infer_types.set_default_schedule_storage_types_and_location(sdfg, None) sdfg.specialize(dict(num_gpus=ng)) out = np.ndarray(shape=[ng, n], dtype=np_dtype) out.fill(0) sdfg(out=out, N=n) res = np.array([ng * i for i in range(ng)]) assert (np.unique(out) == res).all()
def __init__(self, wrapped_type): # Convert python basic types if isinstance(wrapped_type, str): try: wrapped_type = getattr(numpy, wrapped_type) except AttributeError: raise ValueError("Unknown type: {}".format(wrapped_type)) config_data_types = Config.get('compiler', 'default_data_types') if wrapped_type is int: if config_data_types.lower() == 'python': wrapped_type = numpy.int64 elif config_data_types.lower() == 'c': wrapped_type = numpy.int32 else: raise NameError( "Unknown configuration for default_data_types: {}".format( config_data_types)) elif wrapped_type is float: if config_data_types.lower() == 'python': wrapped_type = numpy.float64 elif config_data_types.lower() == 'c': wrapped_type = numpy.float32 else: raise NameError( "Unknown configuration for default_data_types: {}".format( config_data_types)) elif wrapped_type is complex: if config_data_types.lower() == 'python': wrapped_type = numpy.complex128 elif config_data_types.lower() == 'c': wrapped_type = numpy.complex64 else: raise NameError( "Unknown configuration for default_data_types: {}".format( config_data_types)) self.type = wrapped_type # Type in Python self.ctype = _CTYPES[wrapped_type] # Type in C self.ctype_unaligned = self.ctype # Type in C (without alignment) self.dtype = self # For compatibility support with numpy self.bytes = _BYTES[wrapped_type] # Number of bytes for this type
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] map_entry = gnode(RedundantArrayCopying3._map_entry) out_array = gnode(RedundantArrayCopying3._out_array) for e1 in graph.out_edges(map_entry): dst = e1.dst if (isinstance(dst, nodes.AccessNode) and dst != out_array and dst.data == out_array.data): for e2 in graph.out_edges(dst): graph.add_edge(out_array, None, e2.dst, e2.dst_conn, e2.data) graph.remove_edge(e2) graph.remove_edge(e1) graph.remove_node(dst) if Config.get_bool("debugprint"): RedundantArrayCopying3._arrays_removed += 1
def __call__(self, *args, **kwargs): """ Convenience function that parses, compiles, and runs a DaCe program. """ # Parse SDFG sdfg = parse_from_function(self, *args) # Add named arguments to the call kwargs.update({aname: arg for aname, arg in zip(self.argnames, args)}) # Update arguments with symbols in data shapes kwargs.update(infer_symbols_from_shapes(sdfg, kwargs)) # Allow CLI to prompt for optimizations if Config.get_bool('optimizer', 'transform_on_call'): sdfg = sdfg.optimize() # Compile SDFG (note: this is done after symbol inference due to shape # altering transformations such as Vectorization) binaryobj = sdfg.compile() return binaryobj(**kwargs)
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = gnode(RedundantSecondArray._in_array) out_array = gnode(RedundantSecondArray._out_array) memlet = graph.edges_between(in_array, out_array)[0].data if memlet.data == in_array.data: subset = memlet.subset else: subset = memlet.other_subset for e in graph.out_edges(out_array): # Modify all outgoing edges to point to in_array path = graph.memlet_tree(e) for pe in path: if pe.data.data == out_array.data: pe.data.data = in_array.data if isinstance(subset, subsets.Indices): pe.data.subset.offset(subset, False) else: pe.data.subset = subset.compose(pe.data.subset) elif pe.data.other_subset: if isinstance(subset, subsets.Indices): pe.data.other_subset.offset(subset, False) else: pe.data.other_subset = subset.compose( pe.data.other_subset) # Redirect edge to out_array graph.remove_edge(e) graph.add_edge(in_array, e.src_conn, e.dst, e.dst_conn, e.data) # Finally, remove out_array node graph.remove_node(out_array) # TODO: Should the array be removed from the SDFG? # del sdfg.arrays[out_array] if Config.get_bool("debugprint"): RedundantSecondArray._arrays_removed += 1
def __call__(self, *args, **kwargs): # Update arguments from ordered list if len(args) > 0 and self.argnames is not None: kwargs.update({aname: arg for aname, arg in zip(self.argnames, args)}) try: argtuple, initargtuple = self._construct_args(kwargs) # Call initializer function if necessary, then SDFG if self._initialized is False: self._lib.load() self.initialize(*initargtuple) # PROFILING if Config.get_bool('profiling'): operations.timethis(self._sdfg, 'DaCe', 0, self._cfunc, self._libhandle, *argtuple) else: self._cfunc(self._libhandle, *argtuple) return self._return_arrays except (RuntimeError, TypeError, UnboundLocalError, KeyError, cgx.DuplicateDLLError, ReferenceError): self._lib.unload() raise
def parse_from_function(function, *compilation_args, strict=None): """ Try to parse a DaceProgram object and return the `dace.SDFG` object that corresponds to it. @param function: DaceProgram object (obtained from the `@dace.program` decorator). @param compilation_args: Various compilation arguments e.g. dtypes. @param strict: Whether to apply strict transformations or not (None uses configuration-defined value). @return: The generated SDFG object. """ if not isinstance(function, DaceProgram): raise TypeError( 'Function must be of type dace.frontend.python.DaceProgram') # Obtain DaCe program as SDFG sdfg = function.generate_pdp(*compilation_args) # No need at this point # Fill in scope entry/exit connectors #sdfg.fill_scope_connectors() # Memlet propagation #if sdfg.propagate: # labeling.propagate_labels_sdfg(sdfg) ######################## # Apply strict transformations automatically if (strict == True or (strict is None and Config.get_bool( 'optimizer', 'automatic_strict_transformations'))): sdfg.apply_strict_transformations() # Drawing the SDFG (again) to a .dot file sdfg.draw_to_file(recursive=True) sdfg.save(os.path.join('_dotgraphs', 'program.sdfg')) # Validate SDFG sdfg.validate() return sdfg
def _try_to_match_transformation( graph: Union[SDFG, SDFGState], collapsed_graph: nx.DiGraph, subgraph: Dict[int, int], sdfg: SDFG, xform: Type[xf.PatternTransformation], expr_idx: int, nxpattern: nx.DiGraph, state_id: int, permissive: bool, options: Dict[str, Any]) -> Optional[xf.PatternTransformation]: """ Helper function that tries to instantiate a pattern match into a transformation object. """ subgraph = { nxpattern.nodes[j]['node']: graph.node_id(collapsed_graph.nodes[i]['node']) for i, j in subgraph.items() } try: match = xform(sdfg, sdfg.sdfg_id, state_id, subgraph, expr_idx, options=options) match_found = match.can_be_applied(graph, expr_idx, sdfg, permissive=permissive) except Exception as e: if Config.get_bool('optimizer', 'match_exception'): raise print('WARNING: {p}::can_be_applied triggered a {c} exception:' ' {e}'.format(p=xform.__name__, c=e.__class__.__name__, e=e)) return None if match_found: return match return None
def cmake_options(): compiler = make_absolute(Config.get("compiler", "xilinx", "executable")) host_flags = Config.get("compiler", "xilinx", "host_flags") synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags") build_flags = Config.get("compiler", "xilinx", "build_flags") mode = Config.get("compiler", "xilinx", "mode") target_platform = Config.get("compiler", "xilinx", "platform") enable_debugging = ("ON" if Config.get_bool( "compiler", "xilinx", "enable_debugging") else "OFF") options = [ "-DSDACCEL_ROOT_DIR={}".format( os.path.dirname(os.path.dirname(compiler))), "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags), "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags), "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags), "-DDACE_XILINX_MODE={}".format(mode), "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform), "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging), ] return options
def run_local(self, sdfg: SDFG, driver_file: str): workdir = sdfg.build_folder if Config.get_bool('diode', 'general', 'library_autoexpand'): sdfg.expand_library_nodes() code_objects = sdfg.generate_code() use_mpi = Executor._use_mpi(code_objects) # TODO: Implement (instead of pyrun, use mpirun/mpiexec) if use_mpi: raise NotImplementedError('Running MPI locally unimplemented') # Pipe stdout/stderr back to client output stdout = sys.stdout stderr = sys.stderr sys.stdout = FunctionStreamWrapper(self.show_output, stdout.write) sys.stderr = FunctionStreamWrapper(self.show_output, stderr.write) # Compile SDFG generate_program_folder(sdfg, code_objects, workdir, self._config) configure_and_compile(workdir, sdfg.name) self.show_output("Running script\n") # Run driver script with the compiled SDFG(s) as the default old_usecache = Config.get_bool('compiler', 'use_cache') Config.set('compiler', 'use_cache', value=True) try: runpy.run_path(driver_file, run_name='__main__') # Catching all exceptions, including SystemExit except (Exception, SystemExit) as ex: # Corner case: If exited with error code 0, it is a success if isinstance(ex, SystemExit): # If the exit code is nonzero, "raise" will not trigger a # printout on the server if ex.code != 0: traceback.print_exc() raise else: raise self.show_output("Execution Terminated\n") # Revert configuration and output redirection Config.set('compiler', 'use_cache', value=old_usecache) sys.stdout = stdout sys.stderr = stderr
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = gnode(RedundantArray._in_array) out_array = gnode(RedundantArray._out_array) for e in graph.in_edges(in_array): # Modify all incoming edges to point to out_array path = graph.memlet_path(e) for pe in path: if pe.data.data == in_array.data: pe.data.data = out_array.data # Redirect edge to out_array graph.remove_edge(e) graph.add_edge(e.src, e.src_conn, out_array, e.dst_conn, e.data) # Finally, remove in_array node graph.remove_node(in_array) if Config.get_bool("debugprint"): RedundantArray._arrays_removed += 1
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = gnode(RedundantArrayCopying._in_array) med_array = gnode(RedundantArrayCopying._med_array) out_array = gnode(RedundantArrayCopying._out_array) med_edges = len(graph.out_edges(med_array)) med_out_edges = 0 for med_e in graph.out_edges(med_array): if (isinstance(med_e.dst, nodes.AccessNode) and med_e.dst.data == out_array.data): # Modify all outcoming edges to point to in_array for out_e in graph.out_edges(med_e.dst): path = graph.memlet_path(out_e) for pe in path: if pe.data.data == out_array.data: pe.data.data = in_array.data # Redirect edge to in_array graph.remove_edge(out_e) graph.add_edge(in_array, out_e.src_conn, out_e.dst, out_e.dst_conn, out_e.data) # Remove out_array for e in graph.edges_between(med_e, med_e.dst): graph.remove_edge(e) graph.remove_node(med_e.dst) med_out_edges += 1 # Finally, med_array node if med_array.desc(sdfg).transient and med_edges == med_out_edges: for e in graph.edges_between(in_array, med_array): graph.remove_edge(e) graph.remove_node(med_array) if Config.get_bool("debugprint"): RedundantArrayCopying._arrays_removed += 1
def render_config_subtree(self, cv, config_path, grid): # Add notebook to grid and render each child within columized = False notebook = Gtk.Notebook() grid.add(notebook) grid.set_hexpand(True) for i, (cname, cval) in enumerate(sorted(cv.items())): # Create current config "path" cpath = tuple(list(config_path) + [cname]) meta = Config.get_metadata(*cpath) if meta['type'] == 'dict': gtklabel = Gtk.Label() gtklabel.set_label(meta['title']) ngrid = Gtk.Grid() notebook.append_page(ngrid, gtklabel) self.render_config_subtree(cval, cpath, ngrid) continue if columized == False: grid.insert_column(0) grid.insert_column(1) columized = True self.render_config_element(cval, cpath, grid, i, meta)
def setup_env(): num_concurrent_streams = Config.get("compiler", "cuda", "max_concurrent_streams") if 'ORT_USE_STREAMS' in os.environ: ONNXRuntimeCUDA.use_streams = _env2bool(os.environ["ORT_USE_STREAMS"]) if ONNXRuntimeCUDA.use_streams: log.info("Using streams with ORT (experimental)") if num_concurrent_streams == 0: log.info("Setting compiler.cuda.max_concurrent_streams to 8") Config.set("compiler", "cuda", "max_concurrent_streams", value=8) elif num_concurrent_streams == -1: ONNXRuntimeCUDA.use_streams = False else: if num_concurrent_streams != -1: log.info("Setting compiler.cuda.max_concurrent_streams to -1") Config.set("compiler", "cuda", "max_concurrent_streams", value=-1) ONNXRuntimeCUDA.use_streams = False ONNXRuntimeCUDA.max_concurrent_streams = Config.get( "compiler", "cuda", "max_concurrent_streams")
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. import dace from simple_systolic_array import P, make_sdfg from dace.config import Config KERNEL_NAME = ("_this_is_a_very_long_kernel_name_that_does_not_fit_" "in_the_61_character_limit") if __name__ == "__main__": Config.set("compiler", "fpga_vendor", value="intel_fpga") sdfg = make_sdfg("name_too_long") for node, _ in sdfg.all_nodes_recursive(): if isinstance(node, dace.sdfg.nodes.CodeNode): node.label += KERNEL_NAME sdfg.specialize({"P": 4}) try: code = sdfg.generate_code() except dace.codegen.targets.intel_fpga.NameTooLongError: pass else: raise RuntimeError("No exception thrown.")