Example #1
0
 def index_to_ijk(self, index: List[int]):
     """
     Creates a string of the access (for variable name generation).
     :param index: access
     :return: created string
     """
     # current implementation only supports 3 dimension (default)
     if len(index) == 3:
         """
         # v1:
         return "[i{},j{},k{}]".format(
             "" if index[0] == 0 else "+{}".format(index[0]),
             "" if index[1] == 0 else "+{}".format(index[1]),
             "" if index[2] == 0 else "+{}".format(index[2])
         )
         # v2:
         return "_{}_{}_{}".format(index[0], index[1], index[2])
         """
         # compute absolute index
         ind = stencilflow.convert_3d_to_1d(dimensions=self.dimensions,
                                            index=index)
         # return formatted string
         return "_{}".format(ind) if ind >= 0 else "_n{}".format(abs(ind))
     else:
         raise NotImplementedError(
             "Method index_to_ijk has not been implemented for |indices|!=3, here: |indices|={}"
             .format(len(index)))
Example #2
0
 def set_up_dist_to_center(self):
     """
     Computes for all fields/channels the distance from the furthest field access to the center of the stencil
     ([0,0,0,]).
     """
     for item in self.graph.accesses:
         furthest = max(self.graph.accesses[item])
         self.dist_to_center[item] = stencilflow.convert_3d_to_1d(
             dimensions=self.dimensions, index=furthest)
Example #3
0
 def setup_internal_buffers(self) -> None:
     """
     Create and split the internal buffers according to the pipline model (see paper example ref# TODO)
     :return:
     """
     # remove duplicate accesses
     for item in self.graph.accesses:
         self.graph.accesses[item] = self.remove_duplicate_accesses(
             self.graph.accesses[item])
     # slice the internal buffer into junks of accesses
     for buf_name in self.graph.buffer_size:
         # create empty list and sort the accesses according to their relative position
         self.internal_buffer[buf_name]: List[BoundedQueue] = list()
         list.sort(self.graph.accesses[buf_name], reverse=True)
         # split according to the cases
         if len(self.graph.accesses[buf_name]) == 0:  # empty list
             pass
         elif len(self.graph.accesses[buf_name]) == 1:  # single entry list
             # this line would add an additional internal buffer for fields that only have a single access
             self.internal_buffer[buf_name].append(
                 BoundedQueue(name=buf_name, maxsize=1, collection=[None]))
         else:  # many entry list
             # iterate through all of them and split them into correct sizes
             itr = self.graph.accesses[buf_name].__iter__()
             pre = itr.__next__()
             for item in itr:
                 curr = item
                 # calculate size of buffer
                 diff = abs(
                     stencilflow.convert_3d_to_1d(
                         index=stencilflow.list_subtract_cwise(pre, curr),
                         dimensions=self.dimensions))
                 if diff == 0:  # two accesses on same field
                     pass
                 else:
                     self.internal_buffer[buf_name].append(
                         BoundedQueue(name=buf_name,
                                      maxsize=diff,
                                      collection=[None] * diff))
                 pre = curr
Example #4
0
    def __init__(self,
                 name: str,
                 kernel_string: str,
                 dimensions: List[int],
                 data_type: dace.dtypes.typeclass,
                 boundary_conditions: Dict[str, Dict[str, str]],
                 raw_inputs,
                 vectorization: int = 1,
                 plot_graph: bool = False,
                 verbose: bool = False) -> None:
        """

        :param name: name of the kernel
        :param kernel_string: mathematical expression representing the stencil computation
        :param dimensions: global dimensions / problem size (i.e. size of the input array
        :param data_type: data type of the result produced by this kernel
        :param boundary_conditions: dictionary of the boundary condition for each input channel/field
        :param plot_graph: flag indicating whether the underlying graph is being drawn
        :param verbose: flag for console output logging
        """
        # initialize the superclass
        super().__init__(name, BoundedQueue(name="dummy", maxsize=0), data_type)
        # store arguments
        self.kernel_string: str = kernel_string  # raw kernel string input
        self.raw_inputs = raw_inputs
        self.dimensions: List[
            int] = dimensions  # input array dimensions [dimX, dimY, dimZ]
        self.boundary_conditions: Dict[str, Dict[
            str, str]] = boundary_conditions  # boundary_conditions[field_name]
        self.verbose = verbose
        self.vectorization = vectorization
        # read static parameters from config
        self.config: Dict = stencilflow.parse_json("kernel.config")
        self.calculator: Calculator = Calculator()
        # set simulator initial parameters
        self.all_available = False
        self.not_available = set()
        # analyze input
        self.graph: ComputeGraph = ComputeGraph(vectorization=vectorization,
                                                dimensions=dimensions,
                                                raw_inputs=raw_inputs)
        self.graph.generate_graph(
            kernel_string
        )  # generate the ast computation graph from the mathematical expression
        self.graph.calculate_latency(
        )  # calculate the latency in the computation tree to find the critical path
        self.graph.determine_inputs_outputs(
        )  # sort out input nodes (field accesses and constant values) and output
        # nodes
        self.graph.setup_internal_buffers()
        # set plot path (if plot is set to True)
        if plot_graph:
            self.graph.plot_graph(name + ".png")
        # init sim specific params
        self.var_map: Dict[str, float] = dict(
        )  # mapping between variable names and its (current) value: var_map[var_name] =
        # var_value
        self.read_success: bool = False  # flag indicating if read has been successful from all input nodes (=> ready
        # to execute)
        self.exec_success: bool = False  # flag indicating if the execution has been successful
        self.result: float = float(
            'nan'
        )  # execution result of current iteration (see program counter)
        self.outputs: Dict[str, BoundedQueue] = dict()
        # output delay queue: for simulation of calculation latency, fill it up with bubbles
        self.out_delay_queue: BoundedQueue = BoundedQueue(
            name="delay_output",
            maxsize=self.graph.max_latency + 1,
            collection=[None] * self.graph.max_latency)
        # setup internal buffer queues
        self.internal_buffer: Dict[str, BoundedQueue] = dict()
        self.setup_internal_buffers()
        # this method takes care of the (falsely) executed kernel in case of not having a field access at [0,0,0]
        # present and the implication that there might be only fields out of bound s.t. there is a result produced,
        # but there should not be a result yet (see paper example ref# TODO)
        self.dist_to_center: Dict = dict()
        self.set_up_dist_to_center()
        self.center_reached = False
        # add performance metric fields
        self.max_del_buf_usage = dict()
        # for mean
        self.buf_usage_sum = dict()
        self.buf_usage_num = dict()
        self.init_metric = False
        self.PC_exec_start = stencilflow.convert_3d_to_1d(
            dimensions=self.dimensions, index=self.dimensions)  # upper bound
        self.PC_exec_end = 0  # lower bound
Example #5
0
 def iter_comp_tree(self,
                    node: BaseOperationNodeClass,
                    index_relative_to_center=True,
                    replace_negative_index=False,
                    python_syntax=False,
                    flatten_index=True,
                    output_dimensions=None) -> str:
     """
     Iterate through the computation tree in order to generate the kernel string (according to some properties
     e.g. relative to center or replace negative index.
     :param node: current node in the tree
     :param index_relative_to_center: indication wheter the zero index should be at the center of the stencil or the
     furthest element
     :param replace_negative_index: replace the negativ sign '-' by n in order to create variable names that are not
     being split up by the python expression parser (Calculator)
     :return: computation string of the subgraph
     """
     # get predecessor list
     pred = list(self.graph.graph.pred[node])
     # differentiate cases for each node type
     if isinstance(node, BinOp):  # binary operation
         # extract expression elements
         if len(pred) == 1:  # lhs == rhs:
             lhs, rhs = pred[0], pred[0]
         else:
             lhs = pred[0]  # left hand side
             rhs = pred[1]  # right hand side
         # recursively compute the child string
         lhs_str = self.iter_comp_tree(lhs, index_relative_to_center,
                                       replace_negative_index, python_syntax,
                                       flatten_index, output_dimensions)
         rhs_str = self.iter_comp_tree(rhs, index_relative_to_center,
                                       replace_negative_index, python_syntax,
                                       flatten_index, output_dimensions)
         # return formatted string
         return "({} {} {})".format(lhs_str, node.generate_op_sym(), rhs_str)
     elif isinstance(node, Call):  # function call
         # extract expression element
         expr = pred[0]
         # recursively compute the child string
         expr_str = self.iter_comp_tree(expr, index_relative_to_center,
                                        replace_negative_index,
                                        python_syntax)
         # return formatted string
         return "{}({})".format(node.name, expr_str)
     elif isinstance(node, Name) or isinstance(node, Num):
         # return formatted string
         return str(node.name)  # variable name
     elif isinstance(node, Subscript):
         # compute correct indexing according to the flag
         if index_relative_to_center:
             dim_index = node.index
         else:
             dim_index = stencilflow.list_subtract_cwise(
                 node.index, self.graph.max_index[node.name])
         # break down index from 3D (i.e. [X,Y,Z]) to 1D
         if flatten_index:
             # TODO
             if node.name in self.input_paths and self.inputs[
                     node.name]["input_dims"] is not None:
                 ind = [
                     x if x in self.inputs[node.name]["input_dims"] else None
                     for x in stencilflow.ITERATORS
                 ]
                 num_dim = stencilflow.num_dims(ind)
                 #dim_index = dim_index[len(self.dimensions) - num_dim:]
                 new_ind, i = list(), 0
                 for entry in ind:
                     if entry is None:
                         new_ind.append(None)
                     else:
                         new_ind.append(dim_index[i])
                         i += 1
                 dim_index = dim_index  #list(map(lambda x, y: y if x is not None else None, ind, new_ind))
             word_index = stencilflow.convert_3d_to_1d(
                 dimensions=self.dimensions, index=dim_index)
             # replace negative sign if the flag is set
             if replace_negative_index and word_index < 0:
                 return node.name + "[" + "n" + str(abs(word_index)) + "]"
             else:
                 return node.name + "[" + str(word_index) + "]"
         else:
             try:
                 dim_index = [
                     dim_index[stencilflow.ITERATORS.index(i)]
                     for i in self.inputs[node.name]["input_dims"]
                 ]
             except (KeyError, TypeError):
                 pass  # input_dim not defined or is None
             if len(dim_index) > output_dimensions:
                 for i in range(3 - output_dimensions):
                     if dim_index[i] != 0:
                         raise ValueError("Removed used index dimension")
                 dim_index = dim_index[3 - output_dimensions:]
             return node.name + str(dim_index)
     elif isinstance(
             node, Ternary
     ):  # ternary operator of the form true_expr if comp else false_expr
         # extract expression elements
         compare = [x for x in pred if type(x) == Compare][0]  # comparison
         lhs = [x for x in pred if type(x) != Compare][0]  # left hand side
         rhs = [x for x in pred if type(x) != Compare][1]  # right hand side
         # recursively compute the child string
         compare_str = self.iter_comp_tree(compare, index_relative_to_center,
                                           replace_negative_index,
                                           python_syntax, flatten_index,
                                           output_dimensions)
         lhs_str = self.iter_comp_tree(lhs, index_relative_to_center,
                                       replace_negative_index, python_syntax,
                                       flatten_index, output_dimensions)
         rhs_str = self.iter_comp_tree(rhs, index_relative_to_center,
                                       replace_negative_index, python_syntax,
                                       flatten_index, output_dimensions)
         # return formatted string
         if python_syntax:
             return "(({}) if ({}) else ({}))".format(
                 lhs_str, compare_str, rhs_str)
         else:  # C++ ternary operator syntax
             return "(({}) ? ({}) : ({}))".format(compare_str, lhs_str,
                                                  rhs_str)
     elif isinstance(node, Compare):  # comparison
         # extract expression element
         lhs = pred[0]
         rhs = pred[1]
         # recursively compute the child string
         lhs_str = self.iter_comp_tree(lhs, index_relative_to_center,
                                       replace_negative_index, python_syntax,
                                       flatten_index, output_dimensions)
         rhs_str = self.iter_comp_tree(rhs, index_relative_to_center,
                                       replace_negative_index, python_syntax,
                                       flatten_index, output_dimensions)
         # return formatted string
         return "{} {} {}".format(lhs_str, str(node.name), rhs_str)
     elif isinstance(node, UnaryOp):  # unary operations e.g. negation
         # extract expression element
         expr = pred[0]
         # recursively compute the child string
         expr_str = self.iter_comp_tree(
             node=expr,
             index_relative_to_center=index_relative_to_center,
             replace_negative_index=replace_negative_index,
             python_syntax=python_syntax,
             flatten_index=flatten_index,
             output_dimensions=output_dimensions)
         # return formatted string
         return "({}{})".format(node.generate_op_sym(), expr_str)
     else:
         raise NotImplementedError(
             "iter_comp_tree is not implemented for node type {}".format(
                 type(node)))
Example #6
0
 def compute_critical_path(self) -> int:
     """
     Computes the max latency critical path through the graph in scalar format.
     """
     return stencilflow.convert_3d_to_1d(
         index=self.compute_critical_path_dim(), dimensions=self.dimensions)
Example #7
0
 def compute_delay_buffer(self) -> None:
     """
     Computes the delay buffer sizes in the graph by propagating all paths from the input arrays to the successors in
     topological order. Delay buffer entries should be of the format: kernel.input_paths:{
                                                                             "in1": [[a,b,c, pred1], [d,e,f, pred2],
                                                                             ...],
                                                                             "in2": [ ... ],
                                                                             ...
                                                                         }
     where inX are input arrays to the stencil chain and predY are the kernel predecessors/inputs
     """
     # get topological order for top-down walk through of the graph
     try:
         order = list(nx.topological_sort(self.graph))
     except nx.exception.NetworkXUnfeasible:
         cycle = next(nx.algorithms.cycles.simple_cycles(self.graph))
         raise ValueError("Cycle detected: {}".format(
             [c.name for c in cycle]))
     # go through all nodes
     for node in order:
         # process delay buffer (no additional delay buffer will appear because of the topological order)
         for inp in node.input_paths:
             # compute maximum delay size per input
             max_delay = max(node.input_paths[inp])
             max_delay[
                 2] += 1  # add an extra delay cycle for the processing in the kernel node
             # loop over all inputs and set their size relative to the max size to have data ready at the exact
             # same time
             for entry in node.input_paths[inp]:
                 name = entry[-1]
                 max_size = stencilflow.convert_3d_to_1d(
                     dimensions=self.dimensions,
                     index=stencilflow.list_subtract_cwise(
                         max_delay[:-1], entry[:-1]))
                 node.delay_buffer[name] = BoundedQueue(name=name,
                                                        maxsize=max_size)
                 node.delay_buffer[name].import_data(
                     [None] * node.delay_buffer[name].maxsize)
         # set input node delay buffers to 1
         if isinstance(node, Input):
             node.delay_buffer = BoundedQueue(name=node.name,
                                              maxsize=1,
                                              collection=[None])
         # propagate the path lengths (from input arrays over all ways) to the successors
         for succ in self.graph.successors(node):
             # add input node to all as direct input (=0 delay buffer)
             if isinstance(node, Input):
                 # add emtpy list dictionary entry for enabling list append()
                 if node.name not in succ.input_paths:
                     succ.input_paths[node.name] = []
                 successor = [0] * len(self.dimensions)
                 successor = successor + [node.name]
                 succ.input_paths[node.name].append(successor)
             # add kernel node to all, but calculate the length first (predecessor + delay + internal, ..)
             elif isinstance(node, Kernel):  # add KERNEL
                 # add latency, internal_buffer, delay_buffer
                 internal_buffer = [0] * 3
                 for item in node.graph.accesses:
                     internal_buffer = max(
                         node.graph.accesses[item]
                     ) if KernelChainGraph.greater(
                         max(node.graph.accesses[item]),
                         internal_buffer) else internal_buffer
                 # latency
                 latency = self.kernel_nodes[node.name].graph.max_latency
                 # compute delay buffer and create entry
                 for entry in node.input_paths:
                     # the first entry has to initialize the structure
                     if entry not in succ.input_paths:
                         succ.input_paths[entry] = []
                     # compute the actual delay buffer
                     delay_buffer = max(node.input_paths[entry][:])
                     # merge them together
                     total = [
                         i + d if i is not None else d
                         for i, d in zip(internal_buffer, delay_buffer)
                     ]
                     # add the latency too
                     total[-1] += latency
                     total.append(node.name)
                     # add entry to paths
                     succ.input_paths[entry].append(total)
             else:  # NodeType.OUTPUT: do nothing
                 continue