def import_input(self) -> None: """ Read all sections of the program input file. """ inp = stencilflow.parse_json(self.path) # get dimensions self.kernel_dimensions = len(inp["dimensions"]) # get constants if "constants" in inp: self.constants = copy.copy(inp["constants"]) else: self.constants = {} # get vectorization self.vectorization = int( inp["vectorization"]) if "vectorization" in inp else 1 # import program, inputs and outputs self.program = inp["program"] self.inputs = inp["inputs"] for i in self.inputs.values(): if "input_dims" not in i: if "dimensions" in i: i["input_dims"] = i["dimensions"] else: i["input_dims"] = stencilflow.ITERATORS[len(stencilflow. ITERATORS) - self.kernel_dimensions:] self.outputs = inp["outputs"] # handle stencil program output dimensions if self.kernel_dimensions == 1: # 1D for entry in self.program: self.program[entry]["computation_string"] = \ self.program[entry]["computation_string"].replace("[", "[i, j,") # add two extra indices self.dimensions = [ 1, 1 ] + inp["dimensions"] # add two extra dimensions elif self.kernel_dimensions == 2: # 2D for entry in self.program: self.program[entry]["computation_string"] = self.program[entry]["computation_string"] \ .replace("[", "[i,") # add extra index self.dimensions = [1] + inp["dimensions"] # add extra dimension else: # 3D self.dimensions = inp["dimensions"]
def __init__(self, name: str, kernel_string: str, dimensions: List[int], data_type: dace.dtypes.typeclass, boundary_conditions: Dict[str, Dict[str, str]], raw_inputs, vectorization: int = 1, plot_graph: bool = False, verbose: bool = False) -> None: """ :param name: name of the kernel :param kernel_string: mathematical expression representing the stencil computation :param dimensions: global dimensions / problem size (i.e. size of the input array :param data_type: data type of the result produced by this kernel :param boundary_conditions: dictionary of the boundary condition for each input channel/field :param plot_graph: flag indicating whether the underlying graph is being drawn :param verbose: flag for console output logging """ # initialize the superclass super().__init__(name, BoundedQueue(name="dummy", maxsize=0), data_type) # store arguments self.kernel_string: str = kernel_string # raw kernel string input self.raw_inputs = raw_inputs self.dimensions: List[ int] = dimensions # input array dimensions [dimX, dimY, dimZ] self.boundary_conditions: Dict[str, Dict[ str, str]] = boundary_conditions # boundary_conditions[field_name] self.verbose = verbose self.vectorization = vectorization # read static parameters from config self.config: Dict = stencilflow.parse_json("kernel.config") self.calculator: Calculator = Calculator() # set simulator initial parameters self.all_available = False self.not_available = set() # analyze input self.graph: ComputeGraph = ComputeGraph(vectorization=vectorization, dimensions=dimensions, raw_inputs=raw_inputs) self.graph.generate_graph( kernel_string ) # generate the ast computation graph from the mathematical expression self.graph.calculate_latency( ) # calculate the latency in the computation tree to find the critical path self.graph.determine_inputs_outputs( ) # sort out input nodes (field accesses and constant values) and output # nodes self.graph.setup_internal_buffers() # set plot path (if plot is set to True) if plot_graph: self.graph.plot_graph(name + ".png") # init sim specific params self.var_map: Dict[str, float] = dict( ) # mapping between variable names and its (current) value: var_map[var_name] = # var_value self.read_success: bool = False # flag indicating if read has been successful from all input nodes (=> ready # to execute) self.exec_success: bool = False # flag indicating if the execution has been successful self.result: float = float( 'nan' ) # execution result of current iteration (see program counter) self.outputs: Dict[str, BoundedQueue] = dict() # output delay queue: for simulation of calculation latency, fill it up with bubbles self.out_delay_queue: BoundedQueue = BoundedQueue( name="delay_output", maxsize=self.graph.max_latency + 1, collection=[None] * self.graph.max_latency) # setup internal buffer queues self.internal_buffer: Dict[str, BoundedQueue] = dict() self.setup_internal_buffers() # this method takes care of the (falsely) executed kernel in case of not having a field access at [0,0,0] # present and the implication that there might be only fields out of bound s.t. there is a result produced, # but there should not be a result yet (see paper example ref# TODO) self.dist_to_center: Dict = dict() self.set_up_dist_to_center() self.center_reached = False # add performance metric fields self.max_del_buf_usage = dict() # for mean self.buf_usage_sum = dict() self.buf_usage_num = dict() self.init_metric = False self.PC_exec_start = stencilflow.convert_3d_to_1d( dimensions=self.dimensions, index=self.dimensions) # upper bound self.PC_exec_end = 0 # lower bound
def run_program(stencil_file, mode, run_simulation=False, compare_to_reference=False, input_directory=None, use_cached_sdfg=None, skip_execution=False, generate_input=False, synthetic_reads=None, specialize_scalars=False, plot=False, halo=0, repetitions=1, log_level=LogLevel.BASIC, print_result=False): # Load program file program_description = stencilflow.parse_json(stencil_file) name = os.path.basename(stencil_file) name = re.match("(.+)\.[^\.]+", name).group(1).replace(".", "_") # Create SDFG if log_level >= LogLevel.BASIC: print("Creating kernel graph...") chain = KernelChainGraph(path=stencil_file, plot_graph=plot, log_level=log_level) # do simulation if run_simulation: if log_level >= LogLevel.BASIC: print("Running simulation...") sim = Simulator(program_name=name, program_description=program_description, input_nodes=chain.input_nodes, kernel_nodes=chain.kernel_nodes, output_nodes=chain.output_nodes, dimensions=chain.dimensions, write_output=False, log_level=log_level) sim.simulate() simulation_result = sim.get_result() if use_cached_sdfg: if log_level >= LogLevel.BASIC: print("Loading cached SDFG...") sdfg_path = os.path.join(".dacecache", name, "program.sdfg") sdfg = dace.SDFG.from_file(sdfg_path) else: if log_level >= LogLevel.BASIC: print("Generating SDFG...") sdfg = generate_sdfg(name, chain, synthetic_reads=synthetic_reads, specialize_scalars=specialize_scalars) if compare_to_reference: if use_cached_sdfg: if log_level >= LogLevel.BASIC: print("Loading cached reference SDFG...") sdfg_path = os.path.join(".dacecache", name + "_reference", "program.sdfg") reference_sdfg = dace.SDFG.from_file(sdfg_path) else: if log_level >= LogLevel.BASIC: print("Generating reference SDFG...") reference_sdfg = generate_reference(name + "_reference", chain) # Configure and compile SDFG dace.config.Config.set("compiler", "fpga_vendor", value="intel_fpga") # dace.config.Config.set("compiler", "use_cache", value=True) dace.config.Config.set("optimizer", "interface", value="") dace.config.Config.set( "compiler", "intel_fpga", "kernel_flags", value="-fp-relaxed -cl-no-signed-zeros -no-interleaving=default" " -global-ring -duplicate-ring -cl-fast-relaxed-math -cl-single-precision-constant" ) if mode == "emulation": dace.config.Config.set("compiler", "intel_fpga", "mode", value="emulator") elif mode == "hardware": dace.config.Config.set("compiler", "intel_fpga", "mode", value="hardware") else: raise ValueError("Unrecognized execution mode: {}".format(mode)) if log_level >= LogLevel.BASIC: print("Expanding library nodes...") sdfg.expand_library_nodes() if log_level >= LogLevel.BASIC: print("Compiling SDFG...") program = sdfg.compile() if compare_to_reference: if log_level >= LogLevel.BASIC: print("Compiling reference SDFG...") reference_sdfg.expand_library_nodes() reference_program = reference_sdfg.compile() if skip_execution or repetitions == 0: if log_level >= LogLevel.BASIC: print("Skipping execution and exiting.") return # Load data from disk if log_level >= LogLevel.BASIC: print("Loading input arrays...") if input_directory is None: input_directory = os.path.dirname(stencil_file) input_description = copy.copy(program_description["inputs"]) if generate_input: # Generate some input so we don't load files off the disk for k in input_description: input_description[k]["data"] = "constant:0.5" input_arrays = stencilflow.load_input_arrays( input_description, prefix=input_directory, shape=program_description["dimensions"]) # Initialize output arrays if log_level >= LogLevel.BASIC: print("Initializing output arrays...") output_arrays = { arr_name: stencilflow.aligned( np.zeros(program_description["dimensions"], dtype=program_description["program"][arr_name] ["data_type"].type), 64) for arr_name in program_description["outputs"] } if compare_to_reference: reference_output_arrays = copy.deepcopy(output_arrays) # Run program dace_args = { (key + "_host" if hasattr(val, "shape") and len(val.shape) > 0 else key): val for key, val in itertools.chain(input_arrays.items(), output_arrays.items()) } if repetitions == 1: print("Executing DaCe program...") program(**dace_args) print("Finished running program.") else: for i in range(repetitions): print("Executing repetition {}/{}...".format(i + 1, repetitions)) program(**dace_args) print("Finished running program.") if print_result: for key, val in output_arrays.items(): print(key + ":", val) # Run reference program if compare_to_reference: dace_args = { key: val for key, val in itertools.chain(input_arrays.items(), reference_output_arrays.items()) } print("Executing reference DaCe program...") reference_program(**dace_args) print("Finished running program.") if print_result: for key, val in reference_output_arrays.items(): print(key + ":", val) # Write results to file output_folder = os.path.join("results", name) os.makedirs(output_folder, exist_ok=True) if halo > 0: # Prune halos for k, v in output_arrays.items(): output_arrays[k] = v[tuple(slice(halo, -halo) for _ in v.shape)] if compare_to_reference: for k, v in reference_output_arrays.items(): reference_output_arrays[k] = v[tuple( slice(halo, -halo) for _ in v.shape)] stencilflow.save_output_arrays(output_arrays, output_folder) print("Results saved to " + output_folder) if compare_to_reference: reference_folder = os.path.join(output_folder, "reference") os.makedirs(reference_folder, exist_ok=True) stencilflow.save_output_arrays(reference_output_arrays, reference_folder) print("Reference results saved to " + reference_folder) if compare_to_reference: print("Comparing to reference SDFG...") for outp in output_arrays: got = output_arrays[outp] expected = reference_output_arrays[outp] if not stencilflow.arrays_are_equal(np.ravel(got), np.ravel(expected)): print("Expected: {}".format(expected)) print("Got: {}".format(got)) raise ValueError("Result mismatch.") print("Results verified.") return 0 # Compare simulation result to fpga result if run_simulation: print("Comparing results...") all_match = True for outp in output_arrays: print("FPGA result:") print("\t{}".format(np.ravel(output_arrays[outp]))) print("Simulation result:") print("\t{}".format(np.ravel(simulation_result[outp]))) if not stencilflow.arrays_are_equal( np.ravel(output_arrays[outp]), np.ravel(simulation_result[outp])): all_match = False if all_match: print("Results verified.") return 0 else: print("Result mismatch.") return 1
def __init__(self, path: str, plot_graph: bool = False, log_level: LogLevel = LogLevel.NO_LOG) -> None: """ Create new KernelChainGraph with given initialization parameters. :param path: path to the input file :param plot_graph: flag indication whether or not to produce the graphical graph representation :param log_level: flag for console output logging """ if log_level >= LogLevel.MODERATE: print("Initialize KernelChainGraph.") # set parameters # absolute path self.path: str = os.path.abspath(path) # get valid self.log_level: LogLevel = log_level # init internal fields self.inputs: Dict[str, Dict[str, str]] = dict() # input data self.outputs: List[str] = list() # name of the output fields self.dimensions: List[int] = list() # global problem size self.program: Dict[str, Dict[str, Dict[str, Dict[str, str]]]] = dict( ) # mathematical stencil expressions:program[stencil_name] = stencil expression self.vectorization = 1 # kernel vectorization width W self.kernel_latency = None # critical path latency self.channels: Dict[ str, BoundedQueue] = dict() # each channel is an edge between two nodes self.graph: nx.DiGraph = nx.DiGraph() # data flow graph self.input_nodes: Dict[str, Kernel] = dict() # Input nodes of the graph self.output_nodes: Dict[str, Kernel] = dict() # Output nodes of the graph self.kernel_nodes: Dict[str, Kernel] = dict() # Kernel nodes of the graph self.config = stencilflow.parse_json("stencil_chain.config") self.name = os.path.splitext(os.path.basename(self.path))[0] # name self.kernel_dimensions = -1 # 2: 2D, 3: 3D self.constants = {} # trigger all internal calculations if self.log_level >= LogLevel.MODERATE: print("Read input config files.") self.import_input() # read input config file if self.log_level >= LogLevel.MODERATE: print("Create all kernels.") self.create_kernels() # create all kernels if self.log_level >= LogLevel.MODERATE: print("Compute kernel latencies.") self.compute_kernel_latency() # compute their latencies if self.log_level >= LogLevel.MODERATE: print("Connect kernels.") self.connect_kernels() # connect them in the graph if self.log_level >= LogLevel.MODERATE: print("Compute delay buffer sizes.") self.compute_delay_buffer() # compute the delay buffer sizes if self.log_level >= LogLevel.MODERATE: print("Add channels to the graph edges.") # plot kernel graphs if flag set to true if plot_graph: if self.log_level >= LogLevel.MODERATE: print("Plot kernel chain graph.") # plot kernel chain graph self.plot_graph(self.name + ".png") # plot all compute graphs if self.log_level >= LogLevel.MODERATE: print("Plot computation graph of each kernel.") # for compute_kernel in self.kernel_nodes: # self.kernel_nodes[compute_kernel].graph.plot_graph( # self.name + "_" + compute_kernel + ".png") self.add_channels( ) # add all channels (internal buffer and delay buffer) to the edges of the graph # print sin/cos/tan latency warning for kernel in self.program: if "sin" in self.program[kernel]["computation_string"] \ or "cos" in self.program[kernel]["computation_string"] \ or "tan" in self.program[kernel]["computation_string"]: print( "Warning: Computation contains sinusoidal functions with experimental latency values." ) # print report for moderate and high verbosity levels if self.log_level >= LogLevel.MODERATE: self.report(self.name)
simple test stencil program for debugging usage: python3 kernel_chain_graph.py -stencil_file stencils/simulator12.json -plot -simulate -report -log-level 2 """ # instantiate the argument parser parser = argparse.ArgumentParser() parser.add_argument("-stencil_file") parser.add_argument("-plot", action="store_true") parser.add_argument("-log-level", default=LogLevel.MODERATE.value, type=int) parser.add_argument("-report", action="store_true") parser.add_argument("-simulate", action="store_true") args = parser.parse_args() args.log_level = stencilflow.log_level.LogLevel(args.log_level) program_description = stencilflow.parse_json(args.stencil_file) # instantiate the KernelChainGraph chain = KernelChainGraph(path=args.stencil_file, plot_graph=args.plot, log_level=LogLevel(args.log_level)) # simulate the design if argument -simulate is true if args.simulate: sim = Simulator(program_name=re.match( "[^\.]+", os.path.basename(args.stencil_file)).group(0), program_description=program_description, input_nodes=chain.input_nodes, kernel_nodes=chain.kernel_nodes, output_nodes=chain.output_nodes, dimensions=chain.dimensions, write_output=False, log_level=LogLevel(args.log_level))