Example #1
0
 def import_input(self) -> None:
     """
     Read all sections of the program input file.
     """
     inp = stencilflow.parse_json(self.path)
     # get dimensions
     self.kernel_dimensions = len(inp["dimensions"])
     # get constants
     if "constants" in inp:
         self.constants = copy.copy(inp["constants"])
     else:
         self.constants = {}
     # get vectorization
     self.vectorization = int(
         inp["vectorization"]) if "vectorization" in inp else 1
     # import program, inputs and outputs
     self.program = inp["program"]
     self.inputs = inp["inputs"]
     for i in self.inputs.values():
         if "input_dims" not in i:
             if "dimensions" in i:
                 i["input_dims"] = i["dimensions"]
             else:
                 i["input_dims"] = stencilflow.ITERATORS[len(stencilflow.
                                                             ITERATORS) -
                                                     self.kernel_dimensions:]
     self.outputs = inp["outputs"]
     # handle stencil program output dimensions
     if self.kernel_dimensions == 1:  # 1D
         for entry in self.program:
             self.program[entry]["computation_string"] = \
                 self.program[entry]["computation_string"].replace("[", "[i, j,")  # add two extra indices
         self.dimensions = [
             1, 1
         ] + inp["dimensions"]  # add two extra dimensions
     elif self.kernel_dimensions == 2:  # 2D
         for entry in self.program:
             self.program[entry]["computation_string"] = self.program[entry]["computation_string"] \
                 .replace("[", "[i,")  # add extra index
         self.dimensions = [1] + inp["dimensions"]  # add extra dimension
     else:  # 3D
         self.dimensions = inp["dimensions"]
Example #2
0
    def __init__(self,
                 name: str,
                 kernel_string: str,
                 dimensions: List[int],
                 data_type: dace.dtypes.typeclass,
                 boundary_conditions: Dict[str, Dict[str, str]],
                 raw_inputs,
                 vectorization: int = 1,
                 plot_graph: bool = False,
                 verbose: bool = False) -> None:
        """

        :param name: name of the kernel
        :param kernel_string: mathematical expression representing the stencil computation
        :param dimensions: global dimensions / problem size (i.e. size of the input array
        :param data_type: data type of the result produced by this kernel
        :param boundary_conditions: dictionary of the boundary condition for each input channel/field
        :param plot_graph: flag indicating whether the underlying graph is being drawn
        :param verbose: flag for console output logging
        """
        # initialize the superclass
        super().__init__(name, BoundedQueue(name="dummy", maxsize=0), data_type)
        # store arguments
        self.kernel_string: str = kernel_string  # raw kernel string input
        self.raw_inputs = raw_inputs
        self.dimensions: List[
            int] = dimensions  # input array dimensions [dimX, dimY, dimZ]
        self.boundary_conditions: Dict[str, Dict[
            str, str]] = boundary_conditions  # boundary_conditions[field_name]
        self.verbose = verbose
        self.vectorization = vectorization
        # read static parameters from config
        self.config: Dict = stencilflow.parse_json("kernel.config")
        self.calculator: Calculator = Calculator()
        # set simulator initial parameters
        self.all_available = False
        self.not_available = set()
        # analyze input
        self.graph: ComputeGraph = ComputeGraph(vectorization=vectorization,
                                                dimensions=dimensions,
                                                raw_inputs=raw_inputs)
        self.graph.generate_graph(
            kernel_string
        )  # generate the ast computation graph from the mathematical expression
        self.graph.calculate_latency(
        )  # calculate the latency in the computation tree to find the critical path
        self.graph.determine_inputs_outputs(
        )  # sort out input nodes (field accesses and constant values) and output
        # nodes
        self.graph.setup_internal_buffers()
        # set plot path (if plot is set to True)
        if plot_graph:
            self.graph.plot_graph(name + ".png")
        # init sim specific params
        self.var_map: Dict[str, float] = dict(
        )  # mapping between variable names and its (current) value: var_map[var_name] =
        # var_value
        self.read_success: bool = False  # flag indicating if read has been successful from all input nodes (=> ready
        # to execute)
        self.exec_success: bool = False  # flag indicating if the execution has been successful
        self.result: float = float(
            'nan'
        )  # execution result of current iteration (see program counter)
        self.outputs: Dict[str, BoundedQueue] = dict()
        # output delay queue: for simulation of calculation latency, fill it up with bubbles
        self.out_delay_queue: BoundedQueue = BoundedQueue(
            name="delay_output",
            maxsize=self.graph.max_latency + 1,
            collection=[None] * self.graph.max_latency)
        # setup internal buffer queues
        self.internal_buffer: Dict[str, BoundedQueue] = dict()
        self.setup_internal_buffers()
        # this method takes care of the (falsely) executed kernel in case of not having a field access at [0,0,0]
        # present and the implication that there might be only fields out of bound s.t. there is a result produced,
        # but there should not be a result yet (see paper example ref# TODO)
        self.dist_to_center: Dict = dict()
        self.set_up_dist_to_center()
        self.center_reached = False
        # add performance metric fields
        self.max_del_buf_usage = dict()
        # for mean
        self.buf_usage_sum = dict()
        self.buf_usage_num = dict()
        self.init_metric = False
        self.PC_exec_start = stencilflow.convert_3d_to_1d(
            dimensions=self.dimensions, index=self.dimensions)  # upper bound
        self.PC_exec_end = 0  # lower bound
Example #3
0
def run_program(stencil_file,
                mode,
                run_simulation=False,
                compare_to_reference=False,
                input_directory=None,
                use_cached_sdfg=None,
                skip_execution=False,
                generate_input=False,
                synthetic_reads=None,
                specialize_scalars=False,
                plot=False,
                halo=0,
                repetitions=1,
                log_level=LogLevel.BASIC,
                print_result=False):

    # Load program file
    program_description = stencilflow.parse_json(stencil_file)
    name = os.path.basename(stencil_file)
    name = re.match("(.+)\.[^\.]+", name).group(1).replace(".", "_")

    # Create SDFG
    if log_level >= LogLevel.BASIC:
        print("Creating kernel graph...")
    chain = KernelChainGraph(path=stencil_file,
                             plot_graph=plot,
                             log_level=log_level)

    # do simulation
    if run_simulation:
        if log_level >= LogLevel.BASIC:
            print("Running simulation...")
        sim = Simulator(program_name=name,
                        program_description=program_description,
                        input_nodes=chain.input_nodes,
                        kernel_nodes=chain.kernel_nodes,
                        output_nodes=chain.output_nodes,
                        dimensions=chain.dimensions,
                        write_output=False,
                        log_level=log_level)
        sim.simulate()
        simulation_result = sim.get_result()

    if use_cached_sdfg:
        if log_level >= LogLevel.BASIC:
            print("Loading cached SDFG...")
        sdfg_path = os.path.join(".dacecache", name, "program.sdfg")
        sdfg = dace.SDFG.from_file(sdfg_path)
    else:
        if log_level >= LogLevel.BASIC:
            print("Generating SDFG...")
        sdfg = generate_sdfg(name,
                             chain,
                             synthetic_reads=synthetic_reads,
                             specialize_scalars=specialize_scalars)

    if compare_to_reference:
        if use_cached_sdfg:
            if log_level >= LogLevel.BASIC:
                print("Loading cached reference SDFG...")
            sdfg_path = os.path.join(".dacecache", name + "_reference",
                                     "program.sdfg")
            reference_sdfg = dace.SDFG.from_file(sdfg_path)
        else:
            if log_level >= LogLevel.BASIC:
                print("Generating reference SDFG...")
            reference_sdfg = generate_reference(name + "_reference", chain)

    # Configure and compile SDFG
    dace.config.Config.set("compiler", "fpga_vendor", value="intel_fpga")
    # dace.config.Config.set("compiler", "use_cache", value=True)
    dace.config.Config.set("optimizer", "interface", value="")
    dace.config.Config.set(
        "compiler",
        "intel_fpga",
        "kernel_flags",
        value="-fp-relaxed -cl-no-signed-zeros -no-interleaving=default"
        " -global-ring -duplicate-ring -cl-fast-relaxed-math -cl-single-precision-constant"
    )
    if mode == "emulation":
        dace.config.Config.set("compiler",
                               "intel_fpga",
                               "mode",
                               value="emulator")
    elif mode == "hardware":
        dace.config.Config.set("compiler",
                               "intel_fpga",
                               "mode",
                               value="hardware")
    else:
        raise ValueError("Unrecognized execution mode: {}".format(mode))
    if log_level >= LogLevel.BASIC:
        print("Expanding library nodes...")
    sdfg.expand_library_nodes()
    if log_level >= LogLevel.BASIC:
        print("Compiling SDFG...")
    program = sdfg.compile()
    if compare_to_reference:
        if log_level >= LogLevel.BASIC:
            print("Compiling reference SDFG...")
        reference_sdfg.expand_library_nodes()
        reference_program = reference_sdfg.compile()

    if skip_execution or repetitions == 0:
        if log_level >= LogLevel.BASIC:
            print("Skipping execution and exiting.")
        return

    # Load data from disk
    if log_level >= LogLevel.BASIC:
        print("Loading input arrays...")
    if input_directory is None:
        input_directory = os.path.dirname(stencil_file)
    input_description = copy.copy(program_description["inputs"])
    if generate_input:
        # Generate some input so we don't load files off the disk
        for k in input_description:
            input_description[k]["data"] = "constant:0.5"
    input_arrays = stencilflow.load_input_arrays(
        input_description,
        prefix=input_directory,
        shape=program_description["dimensions"])

    # Initialize output arrays
    if log_level >= LogLevel.BASIC:
        print("Initializing output arrays...")
    output_arrays = {
        arr_name: stencilflow.aligned(
            np.zeros(program_description["dimensions"],
                     dtype=program_description["program"][arr_name]
                     ["data_type"].type), 64)
        for arr_name in program_description["outputs"]
    }
    if compare_to_reference:
        reference_output_arrays = copy.deepcopy(output_arrays)

    # Run program
    dace_args = {
        (key +
         "_host" if hasattr(val, "shape") and len(val.shape) > 0 else key): val
        for key, val in itertools.chain(input_arrays.items(),
                                        output_arrays.items())
    }
    if repetitions == 1:
        print("Executing DaCe program...")
        program(**dace_args)
        print("Finished running program.")
    else:
        for i in range(repetitions):
            print("Executing repetition {}/{}...".format(i + 1, repetitions))
            program(**dace_args)
            print("Finished running program.")

    if print_result:
        for key, val in output_arrays.items():
            print(key + ":", val)

    # Run reference program
    if compare_to_reference:
        dace_args = {
            key: val
            for key, val in itertools.chain(input_arrays.items(),
                                            reference_output_arrays.items())
        }
        print("Executing reference DaCe program...")
        reference_program(**dace_args)
        print("Finished running program.")

    if print_result:
        for key, val in reference_output_arrays.items():
            print(key + ":", val)

    # Write results to file
    output_folder = os.path.join("results", name)
    os.makedirs(output_folder, exist_ok=True)
    if halo > 0:
        # Prune halos
        for k, v in output_arrays.items():
            output_arrays[k] = v[tuple(slice(halo, -halo) for _ in v.shape)]
        if compare_to_reference:
            for k, v in reference_output_arrays.items():
                reference_output_arrays[k] = v[tuple(
                    slice(halo, -halo) for _ in v.shape)]
    stencilflow.save_output_arrays(output_arrays, output_folder)
    print("Results saved to " + output_folder)
    if compare_to_reference:
        reference_folder = os.path.join(output_folder, "reference")
        os.makedirs(reference_folder, exist_ok=True)
        stencilflow.save_output_arrays(reference_output_arrays,
                                       reference_folder)
        print("Reference results saved to " + reference_folder)

    if compare_to_reference:
        print("Comparing to reference SDFG...")
        for outp in output_arrays:
            got = output_arrays[outp]
            expected = reference_output_arrays[outp]
            if not stencilflow.arrays_are_equal(np.ravel(got),
                                                np.ravel(expected)):
                print("Expected: {}".format(expected))
                print("Got:      {}".format(got))
                raise ValueError("Result mismatch.")
        print("Results verified.")
        return 0

    # Compare simulation result to fpga result
    if run_simulation:
        print("Comparing results...")
        all_match = True
        for outp in output_arrays:
            print("FPGA result:")
            print("\t{}".format(np.ravel(output_arrays[outp])))
            print("Simulation result:")
            print("\t{}".format(np.ravel(simulation_result[outp])))
            if not stencilflow.arrays_are_equal(
                    np.ravel(output_arrays[outp]),
                    np.ravel(simulation_result[outp])):
                all_match = False
        if all_match:
            print("Results verified.")
            return 0
        else:
            print("Result mismatch.")
            return 1
Example #4
0
 def __init__(self,
              path: str,
              plot_graph: bool = False,
              log_level: LogLevel = LogLevel.NO_LOG) -> None:
     """
     Create new KernelChainGraph with given initialization parameters.
     :param path: path to the input file
     :param plot_graph: flag indication whether or not to produce the graphical graph representation
     :param log_level: flag for console output logging
     """
     if log_level >= LogLevel.MODERATE:
         print("Initialize KernelChainGraph.")
     # set parameters
     # absolute path
     self.path: str = os.path.abspath(path)  # get valid
     self.log_level: LogLevel = log_level
     # init internal fields
     self.inputs: Dict[str, Dict[str, str]] = dict()  # input data
     self.outputs: List[str] = list()  # name of the output fields
     self.dimensions: List[int] = list()  # global problem size
     self.program: Dict[str, Dict[str, Dict[str, Dict[str, str]]]] = dict(
     )  # mathematical stencil expressions:program[stencil_name] = stencil expression
     self.vectorization = 1  # kernel vectorization width W
     self.kernel_latency = None  # critical path latency
     self.channels: Dict[
         str,
         BoundedQueue] = dict()  # each channel is an edge between two nodes
     self.graph: nx.DiGraph = nx.DiGraph()  # data flow graph
     self.input_nodes: Dict[str,
                            Kernel] = dict()  # Input nodes of the graph
     self.output_nodes: Dict[str,
                             Kernel] = dict()  # Output nodes of the graph
     self.kernel_nodes: Dict[str,
                             Kernel] = dict()  # Kernel nodes of the graph
     self.config = stencilflow.parse_json("stencil_chain.config")
     self.name = os.path.splitext(os.path.basename(self.path))[0]  # name
     self.kernel_dimensions = -1  # 2: 2D, 3: 3D
     self.constants = {}
     # trigger all internal calculations
     if self.log_level >= LogLevel.MODERATE:
         print("Read input config files.")
     self.import_input()  # read input config file
     if self.log_level >= LogLevel.MODERATE:
         print("Create all kernels.")
     self.create_kernels()  # create all kernels
     if self.log_level >= LogLevel.MODERATE:
         print("Compute kernel latencies.")
     self.compute_kernel_latency()  # compute their latencies
     if self.log_level >= LogLevel.MODERATE:
         print("Connect kernels.")
     self.connect_kernels()  # connect them in the graph
     if self.log_level >= LogLevel.MODERATE:
         print("Compute delay buffer sizes.")
     self.compute_delay_buffer()  # compute the delay buffer sizes
     if self.log_level >= LogLevel.MODERATE:
         print("Add channels to the graph edges.")
     # plot kernel graphs if flag set to true
     if plot_graph:
         if self.log_level >= LogLevel.MODERATE:
             print("Plot kernel chain graph.")
         # plot kernel chain graph
         self.plot_graph(self.name + ".png")
         # plot all compute graphs
         if self.log_level >= LogLevel.MODERATE:
             print("Plot computation graph of each kernel.")
         # for compute_kernel in self.kernel_nodes:
         #     self.kernel_nodes[compute_kernel].graph.plot_graph(
         #         self.name + "_" + compute_kernel + ".png")
     self.add_channels(
     )  # add all channels (internal buffer and delay buffer) to the edges of the graph
     # print sin/cos/tan latency warning
     for kernel in self.program:
         if "sin" in self.program[kernel]["computation_string"] \
                 or "cos" in self.program[kernel]["computation_string"] \
                 or "tan" in self.program[kernel]["computation_string"]:
             print(
                 "Warning: Computation contains sinusoidal functions with experimental latency values."
             )
     # print report for moderate and high verbosity levels
     if self.log_level >= LogLevel.MODERATE:
         self.report(self.name)
Example #5
0
        simple test stencil program for debugging

        usage: python3 kernel_chain_graph.py -stencil_file stencils/simulator12.json -plot -simulate -report -log-level 2
    """
    # instantiate the argument parser
    parser = argparse.ArgumentParser()
    parser.add_argument("-stencil_file")
    parser.add_argument("-plot", action="store_true")
    parser.add_argument("-log-level",
                        default=LogLevel.MODERATE.value,
                        type=int)
    parser.add_argument("-report", action="store_true")
    parser.add_argument("-simulate", action="store_true")
    args = parser.parse_args()
    args.log_level = stencilflow.log_level.LogLevel(args.log_level)
    program_description = stencilflow.parse_json(args.stencil_file)
    # instantiate the KernelChainGraph
    chain = KernelChainGraph(path=args.stencil_file,
                             plot_graph=args.plot,
                             log_level=LogLevel(args.log_level))
    # simulate the design if argument -simulate is true
    if args.simulate:
        sim = Simulator(program_name=re.match(
            "[^\.]+", os.path.basename(args.stencil_file)).group(0),
                        program_description=program_description,
                        input_nodes=chain.input_nodes,
                        kernel_nodes=chain.kernel_nodes,
                        output_nodes=chain.output_nodes,
                        dimensions=chain.dimensions,
                        write_output=False,
                        log_level=LogLevel(args.log_level))