예제 #1
0
    def get_load_stride(self, sdfg: SDFG, state: SDFGState, node: nodes.Node,
                        memlet: dace.Memlet) -> symbolic.SymExpr:
        """Determines the stride of a load/store based on:
            - The memlet subset
            - The array strides
            - The involved SVE loop stride"""

        scope = util.get_sve_scope(sdfg, state, node)
        if scope is None:
            raise NotImplementedError('Not in an SVE scope')

        sve_param = scope.map.params[-1]
        sve_range = scope.map.range[-1]
        sve_sym = dace.symbolic.symbol(sve_param)

        array = sdfg.arrays[memlet.data]

        # 1. Flatten the subset to a 1D-offset (using the array strides)
        offset_1 = memlet.subset.at([0] * len(array.strides), array.strides)

        if not offset_1.has(sve_sym):
            raise util.NotSupportedError("SVE param does not occur in subset")

        # 2. Replace the SVE loop param with its next (possibly strided) value
        offset_2 = offset_1.subs(sve_sym, sve_sym + sve_range[2])

        # 3. The load stride is the difference between both
        stride = (offset_2 - offset_1).simplify()

        return stride
예제 #2
0
    def _BoolOp(self, t):
        if util.only_scalars_involed(self.get_defined_symbols(), *t.values):
            return super()._BoolOp(t)

        types = self.infer(*t.values)

        # Bool ops are nested SVE instructions, so we must make sure they all act on vectors
        for type in types:
            if not isinstance(type, dtypes.vector):
                raise util.NotSupportedError(
                    'Non-vectorizable boolean operation')

        # There can be many t.values, e.g. if
        # x or y or z
        for val in t.values:
            # The last entry doesn't need more nesting
            if val == t.values[-1]:
                self.dispatch(t.values[-1])
                break

            # Binary nesting
            self.write('{}_z({}, '.format(util.BOOL_OP_TO_SVE[t.op.__class__],
                                          self.pred_name))
            self.dispatch(val)
            self.write(', ')

        # Close all except the last bracket (because the last entry isn't nested)
        self.write(')' * (len(t.values) - 1))
예제 #3
0
파일: codegen.py 프로젝트: am-ivanov/dace
    def copy_memory(self, sdfg: SDFG, dfg: SDFGState, state_id: int, src_node: nodes.Node, dst_node: nodes.Node,
                    edge: gr.MultiConnectorEdge[mm.Memlet], function_stream: CodeIOStream,
                    callsite_stream: CodeIOStream) -> None:

        # Check whether it is a known reduction that is possible in SVE
        reduction_type = detect_reduction_type(edge.data.wcr)
        if reduction_type not in util.REDUCTION_TYPE_TO_SVE:
            raise util.NotSupportedError('Unsupported reduction in SVE')

        nc = not is_write_conflicted(dfg, edge)
        desc = edge.src.desc(sdfg)
        if not nc or not isinstance(desc.dtype, (dtypes.pointer, dtypes.vector)):
            # WCR on vectors works in two steps:
            # 1. Reduce the SVE register using SVE instructions into a scalar
            # 2. WCR the scalar to memory using DaCe functionality
            wcr = self.cpu_codegen.write_and_resolve_expr(sdfg, edge.data, not nc, None, '@', dtype=desc.dtype)
            callsite_stream.write(wcr[:wcr.find('@')] + util.REDUCTION_TYPE_TO_SVE[reduction_type] +
                                  f'(svptrue_{util.TYPE_TO_SVE_SUFFIX[desc.dtype]}(), ' + src_node.label +
                                  wcr[wcr.find('@') + 1:] + ');')
            return
        else:
            ######################
            # Horizontal non-atomic reduction
            raise NotImplementedError()

        return super().copy_memory(sdfg, dfg, state_id, src_node, dst_node, edge, function_stream, callsite_stream)
예제 #4
0
    def _Call(self, t):
        res_type = self.infer(t)[0]
        if not res_type:
            raise util.NotSupportedError(f'Unsupported call')

        if not isinstance(res_type, dtypes.vector):
            # Call does not involve any vectors (to our knowledge)
            # Replace default modules (e.g., math) with dace::math::
            attr_name = astutils.rname(t)
            module_name = attr_name[:attr_name.rfind(".")]
            func_name = attr_name[attr_name.rfind(".") + 1:]
            if module_name not in dtypes._ALLOWED_MODULES:
                raise NotImplementedError(
                    f'Module {module_name} is not implemented')
            cpp_mod_name = dtypes._ALLOWED_MODULES[module_name]
            name = cpp_mod_name + func_name

            self.write(name)
            self.write('(')

            comma = False
            for e in t.args:
                if comma:
                    self.write(", ")
                else:
                    comma = True
                self.dispatch(e)
            self.write(')')
            return

        name = None
        if isinstance(t.func, ast.Name):
            # Could be an internal operation (provided by the preprocessor)
            if not util.is_sve_internal(t.func.id):
                raise NotImplementedError(
                    f'Function {t.func.id} is not implemented')
            name = util.internal_to_external(t.func.id)[0]
        elif isinstance(t.func, ast.Attribute):
            # Some module function (xxx.xxx), make sure it is available
            name = util.MATH_FUNCTION_TO_SVE.get(astutils.rname(t.func))
            if name is None:
                raise NotImplementedError(
                    f'Function {astutils.rname(t.func)} is not implemented')

        # Vectorized function
        self.write('{}_x({}, '.format(name, self.pred_name))
        comma = False
        for e in t.args:
            if comma:
                self.write(", ")
            else:
                comma = True
            self.dispatch_expect(e, res_type)
        self.write(')')
예제 #5
0
def vectorize_connector(sdfg: dace.SDFG, dfg: dace.SDFGState,
                        node: dace.nodes.Node, par: str, conn: str,
                        is_input: bool):
    edges = get_connector_edges(dfg, node, conn, is_input)
    connectors = node.in_connectors if is_input else node.out_connectors

    for edge in edges:
        if edge.data.data is None:
            # Empty memlets
            return

        desc = sdfg.arrays[edge.data.data]

        if isinstance(desc, data.Stream):
            # Streams are treated differently in SVE, instead of pointers they become vectors of unknown size
            connectors[conn] = dace.dtypes.vector(connectors[conn].base_type,
                                                  -1)
            return

        if isinstance(connectors[conn],
                      (dace.dtypes.vector, dace.dtypes.pointer)):
            # No need for vectorization
            return

        subset = edge.data.subset

        sve_dim = None
        for i, rng in enumerate(subset):
            for expr in rng:
                if symbolic.symbol(par) in symbolic.pystr_to_symbolic(
                        expr).free_symbols:
                    if sve_dim is not None and sve_dim != i:
                        raise util.NotSupportedError(
                            'Non-vectorizable memlet (loop param occurs in more than one dimension)'
                        )
                    sve_dim = i

        if sve_dim is None and edge.data.wcr is None:
            # Should stay scalar
            return

        if sve_dim is not None:
            sve_subset = subset[sve_dim]
            edge.data.subset[sve_dim] = (sve_subset[0],
                                         sve_subset[0] + util.SVE_LEN,
                                         sve_subset[2])

        connectors[conn] = dace.dtypes.vector(
            connectors[conn].type or desc.dtype, util.SVE_LEN)
예제 #6
0
파일: codegen.py 프로젝트: carljohnsen/dace
    def generate_out_register(self,
                              sdfg: SDFG,
                              state: SDFGState,
                              edge: graph.MultiConnectorEdge[mm.Memlet],
                              code: CodeIOStream,
                              use_data_name: bool = False) -> bool:
        """
            Responsible for generating temporary out registers in a Tasklet, given an outgoing edge.
            Returns `True` if a writeback of this register is needed.
        """
        if edge.src_conn is None:
            return

        dst_node = state.memlet_path(edge)[-1].dst

        src_type = edge.src.out_connectors[edge.src_conn]
        src_name = edge.src_conn

        if use_data_name:
            src_name = edge.data.data

        if isinstance(dst_node, nodes.AccessNode) and isinstance(
                dst_node.desc(sdfg), data.Stream):
            # Streams don't need writeback and are treated differently
            self.stream_associations[edge.src_conn] = (edge.data.data,
                                                       src_type.base_type)
            return False
        elif edge.data.wcr is not None:
            # WCR is addressed within the unparser to capture conditionals
            self.wcr_associations[edge.src_conn] = (dst_node, edge,
                                                    src_type.base_type)
            return False

        # Create temporary registers
        ctype = None
        if util.is_vector(src_type):
            ctype = util.TYPE_TO_SVE[src_type.type]
        elif util.is_scalar(src_type):
            ctype = src_type.ctype
        else:
            raise util.NotSupportedError(
                'Unsupported Code->Code edge (pointer)')

        self.dispatcher.defined_vars.add(src_name, DefinedType.Scalar, ctype)
        code.write(f'{ctype} {src_name};')

        return True
예제 #7
0
    def create_empty_definition(self,
                                conn: dace.typeclass,
                                edge: gr.MultiConnectorEdge[mm.Memlet],
                                callsite_stream: CodeIOStream,
                                output: bool = False,
                                is_code_code: bool = False):
        """ Creates a simple variable definition `type name;`, which works for both vectors and regular data types. """

        var_name = None
        var_type = None
        var_ctype = None

        if output:
            var_name = edge.dst_conn
        else:
            var_name = edge.src_conn

        if is_code_code:
            # For edges between Tasklets (Code->Code), we use the data as name because these registers are temporary and shared
            var_name = edge.data.data

        if isinstance(conn, dtypes.vector):
            # Creates an SVE register

            if conn.type not in util.TYPE_TO_SVE:
                raise util.NotSupportedError('Data type not supported')

            # In case of a WCR, we must initialize it with the identity value.
            # This is to prevent cases in a conditional WCR, where we don't write and it is filled with garbage.
            # Currently, the initial is 0, because product reduction is not supported in SVE.
            init_str = ''
            if edge.data.wcr:
                init_str = ' = {}(0)'.format(util.instr('dup', type=conn.type))

            var_type = conn.type
            var_ctype = util.TYPE_TO_SVE[var_type]

            callsite_stream.write('{} {}{};'.format(var_ctype, var_name,
                                                    init_str))
        else:
            raise NotImplementedError(
                f'Output into scalar or pointer is not supported ({var_name})')

        self.dispatcher.defined_vars.add(var_name, var_type, var_ctype)
예제 #8
0
    def _Assign(self, t):
        if len(t.targets) > 1:
            raise util.NotSupportedError('Tuple output not supported')

        target = t.targets[0]
        if isinstance(target,
                      ast.Name) and target.id in self.stream_associations:
            # Assigning to a stream variable is equivalent to a push
            self.push_to_stream(t, target)
            return
        elif isinstance(target,
                        ast.Name) and target.id in self.wcr_associations:
            # Assigning to a WCR output
            self.resolve_conflict(t, target)
            return

        lhs_type, rhs_type = self.infer(target, t.value)

        if rhs_type is None:
            raise NotImplementedError(
                f'Can not infer RHS of assignment ({astunparse.unparse(t.value)})'
            )

        is_new_variable = False

        if lhs_type is None:
            # The LHS could involve a variable name that was not declared (which is why inference fails)
            if not isinstance(
                    target,
                    ast.Name) or target.id in self.get_defined_symbols():
                # Either we don't assign to a name, or the variable name has
                # already been declared (but infer still fails, i.e. something went wrong!)
                raise NotImplementedError('Can not infer LHS of assignment')

            # Declare it as `type name`
            lhs_type = rhs_type
            if isinstance(rhs_type, dtypes.vector):
                # SVE register is possible (declare it as svXXX_t)
                self.fill(util.TYPE_TO_SVE[rhs_type.type])
                self.write(' ')
                # Define the new symbol as vector
                self.defined_symbols.update({target.id: rhs_type})
            elif isinstance(rhs_type, dtypes.pointer):
                raise util.NotSupportedError(
                    'Defining pointers in Tasklet code not supported')

            # Otherwise, the fallback will grab the case of a scalar,
            # because the RHS is scalar, and the LHS is the same
            is_new_variable = True

        # LHS and RHS types are now both well defined
        lhs_vec = isinstance(lhs_type, dtypes.vector)
        rhs_vec = isinstance(rhs_type, dtypes.vector)

        # TODO: This is only bad if we assign to a variable from an outer scope
        """
        if self.if_depth > 0 and not lhs_vec:
            raise util.NotSupportedError(
                'Assignments in an if block must be to a vector or stream (otherwise not vectorizable)')
        """

        if not lhs_vec and not rhs_vec:
            # Simple scalar-scalar assign handled by fallback
            super()._Assign(t)
            if isinstance(target, ast.Name):
                self.defined_symbols.update({target.id: rhs_type})
            return

        if not is_new_variable:
            # Indentation fix
            self.fill()

        # Some vector assignment
        self.dispatch(target)
        self.write(' = ')

        # Note, that if this variable is declared in the same line, we
        # don't need to select at all (there is nothing to select from,
        # because it just got declared)
        if self.if_depth > 0 and not is_new_variable:
            # If we are in an If block, we assign based on the predicate
            # In case of "a = b", we do:
            # a = select(if_pred, b, a)
            self.write(f'svsel({self.pred_name}, ')

        self.dispatch_expect(t.value, lhs_type)

        if self.if_depth > 0 and not is_new_variable:
            # Close the select
            self.write(', ')
            self.dispatch(target)
            self.write(')')

        self.write(';')
예제 #9
0
    def vector_reduction_expr(self, edge, dtype, rhs):
        # Check whether it is a known reduction that is possible in SVE
        reduction_type = detect_reduction_type(edge.data.wcr)
        if reduction_type not in util.REDUCTION_TYPE_TO_SVE:
            raise util.NotSupportedError('Unsupported reduction in SVE')

        nc = not is_write_conflicted(self.dfg, edge)
        if not nc or not isinstance(edge.src.out_connectors[edge.src_conn],
                                    (dtypes.pointer, dtypes.vector)):
            # WCR on vectors works in two steps:
            # 1. Reduce the SVE register using SVE instructions into a scalar
            # 2. WCR the scalar to memory using DaCe functionality
            dst_node = self.dfg.memlet_path(edge)[-1].dst
            if (isinstance(dst_node, nodes.AccessNode) and dst_node.desc(
                    self.sdfg).storage == dtypes.StorageType.SVE_Register):
                return

            wcr = self.cpu_codegen.write_and_resolve_expr(self.sdfg,
                                                          edge.data,
                                                          not nc,
                                                          None,
                                                          '@',
                                                          dtype=dtype)
            self.fill(wcr[:wcr.find('@')])
            self.write(util.REDUCTION_TYPE_TO_SVE[reduction_type])
            self.write('(')
            self.write(self.pred_name)
            self.write(', ')
            self.dispatch_expect(rhs, dtypes.vector(dtype, -1))
            self.write(')')
            self.write(wcr[wcr.find('@') + 1:])
            self.write(';')
        else:
            ######################
            # Horizontal non-atomic reduction

            stride = edge.data.get_stride(self.sdfg, self.map)

            # long long fix
            ptr_cast = ''
            src_type = edge.src.out_connectors[edge.src_conn]

            if src_type.type == np.int64:
                ptr_cast = '(int64_t*) '
            elif src_type.type == np.uint64:
                ptr_cast = '(uint64_t*) '

            store_args = '{}, {}'.format(
                self.pred_name,
                ptr_cast +
                cpp_ptr_expr(self.sdfg, edge.data, DefinedType.Pointer),
            )

            red_type = util.REDUCTION_TYPE_TO_SVE[reduction_type][:-1] + '_x'
            if stride == 1:
                self.write(
                    f'svst1({store_args}, {red_type}({self.pred_name}, svld1({store_args}), '
                )
                self.dispatch_expect(rhs, dtypes.vector(dtype, -1))
                self.write('));')
            else:
                store_args = f'{store_args}, svindex_s{util.get_base_type(src_type).bytes * 8}(0, {sym2cpp(stride)})'
                self.write(
                    f'svst1_scatter_index({store_args}, {red_type}({self.pred_name}, svld1_gather_index({store_args}), '
                )
                self.dispatch_expect(rhs, dtypes.vector(dtype, -1))
                self.write('));')
예제 #10
0
    def dispatch_expect(self, tree: ast.AST, expect: dtypes.typeclass):
        """
        This function is an extension to the dispatch() call and allows to pass
        the type that is expected when unparsing the tree and will take care of
        any casting that might be required. It is mainly used in SVE instructions
        in cases where an argument must be of some type (otherwise the
        compiler complains).
        """

        inf = self.infer(tree)[0]

        # Sanity check
        if not inf:
            raise util.NotSupportedError(
                f'Could not infer the expression type of `{astunparse.unparse(tree)}`'
            )

        if isinstance(inf, dtypes.vector):
            # Unparsing a vector
            if isinstance(expect, dtypes.vector):
                # A vector is expected
                if inf.vtype.type == expect.vtype.type:
                    # No cast required
                    self.dispatch(tree)
                else:
                    # TODO: Cast vectors (but only if same bitwidth)
                    raise NotImplementedError(
                        'Vector-vector casting not implemented')
            else:
                # A pointer or scalar is expected (incompatible)
                raise util.NotSupportedError(
                    'Given a vector, expected a scalar or pointer')
        elif isinstance(inf, dtypes.pointer):
            # Unparsing a pointer
            if isinstance(expect, dtypes.pointer):
                # Expecting a pointer
                if inf.base_type.type == expect.base_type.type:
                    # No cast required, expect for `long long` fix
                    if expect.base_type.type == np.int64:
                        self.write('(int_64_t*) ')
                    if expect.base_type.type == np.uint64:
                        self.write('(uint_64_t*) ')
                    self.dispatch(tree)
                else:
                    raise util.NotSupportedError('Inconsistent pointer types')
            else:
                # Expecting anything else
                raise util.NotSupportedError(
                    'Given a pointer, expected a scalar or vector')
        else:
            # Unparsing a scalar
            if isinstance(expect, dtypes.vector):
                # Expecting a vector: duplicate the scalar
                if expect.type in [np.bool, np.bool_, bool]:
                    # Special case for duplicating boolean into predicate
                    suffix = f'b{self.pred_bits}'
                    #self.write(f'svptrue_{suffix}()')
                    self.dispatch_expect(tree, expect.base_type)
                    self.write(f' ? svptrue_{suffix}() : svpfalse_b()')
                else:
                    self.write(
                        f'svdup_{util.TYPE_TO_SVE_SUFFIX[expect.type]}(')
                    self.dispatch_expect(tree, expect.base_type)
                    self.write(')')

            elif isinstance(expect, dtypes.pointer):
                # Expecting a pointer
                raise util.NotSupportedError(
                    'Given a scalar, expected a pointer')
            else:
                # Expecting a scalar: cast if needed
                cast_ctype = None
                if inf.type != expect.type:
                    cast_ctype = expect.ctype

                # Special casting for `long long`
                if expect.type == np.int64:
                    cast_ctype = 'int64_t'
                elif expect.type == np.uint64:
                    cast_ctype = 'uint64_t'

                if cast_ctype:
                    self.write(f'({cast_ctype}) ')

                self.dispatch(tree)
예제 #11
0
    def write_back(self, sdfg: SDFG, dfg: state.StateSubgraphView,
                   state_id: int, src_node: nodes.Node, dst_node: nodes.Node,
                   edge: graph.MultiConnectorEdge,
                   function_stream: CodeIOStream,
                   callsite_stream: CodeIOStream):
        scope = util.get_sve_scope(sdfg, dfg, src_node)
        if scope is None:
            raise NotImplementedError('Not in an SVE scope')

        out_conn = src_node.out_connectors[edge.src_conn]
        if out_conn.type not in util.TYPE_TO_SVE:
            raise NotImplementedError(
                f'Data type {out_conn.type} not supported')

        if edge.data.wcr is None:
            # No WCR required

            if isinstance(dst_node, dace.nodes.Tasklet):
                # Writeback into a tasklet is just writing into the shared register
                callsite_stream.write(f'{edge.data.data} = {edge.src_conn};')
                return

            if isinstance(out_conn, dtypes.vector):
                # If no WCR, we can directly store the vector (SVE register) in memory
                # Determine the stride of the store and use a scatter load if applicable

                stride = self.get_load_stride(sdfg, dfg, src_node, edge.data)

                ptr_cast = ''
                if out_conn.type == np.int64:
                    ptr_cast = '(int64_t*) '
                elif out_conn.type == np.uint64:
                    ptr_cast = '(uint64_t*) '

                store_args = '{}, {}'.format(
                    util.get_loop_predicate(sdfg, dfg, src_node),
                    ptr_cast +
                    cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer),
                )

                if stride == 1:
                    callsite_stream.write(
                        f'svst1({store_args}, {edge.src_conn});')
                else:
                    callsite_stream.write(
                        f'svst1_scatter_index({store_args}, svindex_s{util.get_base_type(out_conn).bytes * 8}(0, {sym2cpp(stride)}), {edge.src_conn});'
                    )
            else:
                raise NotImplementedError('Writeback into non-vector')
        else:
            # TODO: Check what are we WCR'ing in?

            # Since we have WCR, we must determine a suitable SVE reduce instruction
            # Check whether it is a known reduction that is possible in SVE
            reduction_type = detect_reduction_type(edge.data.wcr)
            if reduction_type not in util.REDUCTION_TYPE_TO_SVE:
                raise util.NotSupportedError('Unsupported reduction in SVE')

            # If the memlet contains the innermost SVE param, we have a problem, because
            # SVE doesn't support WCR stores. This would require unrolling the loop.
            if scope.params[-1] in edge.data.free_symbols:
                raise util.NotSupportedError(
                    'SVE loop param used in WCR memlet')

            # WCR on vectors works in two steps:
            # 1. Reduce the SVE register using SVE instructions into a scalar
            # 2. WCR the scalar to memory using DaCe functionality

            sve_reduction = '{}({}, {})'.format(
                util.REDUCTION_TYPE_TO_SVE[reduction_type],
                util.get_loop_predicate(sdfg, dfg, src_node), edge.src_conn)

            ptr_cast = ''
            if out_conn.type == np.int64:
                ptr_cast = '(long long*) '
            elif out_conn.type == np.uint64:
                ptr_cast = '(unsigned long long*) '

            wcr_expr = self.cpu_codegen.write_and_resolve_expr(
                sdfg,
                edge.data,
                edge.data.wcr_nonatomic,
                None,
                ptr_cast + sve_reduction,
                dtype=out_conn.vtype)

            callsite_stream.write(wcr_expr + ';')
예제 #12
0
    def copy_memory(self, sdfg: SDFG, dfg: SDFGState, state_id: int,
                    src_node: nodes.Node, dst_node: nodes.Node,
                    edge: gr.MultiConnectorEdge[mm.Memlet],
                    function_stream: CodeIOStream,
                    callsite_stream: CodeIOStream):
        # We should always be in an SVE scope
        scope = util.get_sve_scope(sdfg, dfg, dst_node)
        if scope is None:
            raise NotImplementedError('Not in an SVE scope')

        in_conn = dst_node.in_connectors[edge.dst_conn]

        if isinstance(src_node, dace.nodes.Tasklet):
            # Copy from tasklet is just copying the shared register
            # Use defined_vars to get the C++ type of the shared register
            callsite_stream.write(
                f'{self.dispatcher.defined_vars.get(edge.data.data)[1]} {edge.dst_conn} = {edge.data.data};'
            )
            return

        if not isinstance(src_node, dace.nodes.AccessNode):
            raise util.NotSupportedError(
                'Copy neither from Tasklet nor AccessNode')

        src_desc = src_node.desc(sdfg)

        if isinstance(src_desc, dace.data.Stream):
            # A copy from a stream will trigger a vector pop
            raise NotImplementedError()

            # FIXME: Issue when we can pop different amounts of data!
            # If we limit to the smallest amount, certain data will be lost (never processed)
            """
            # SVE register where the stream will be popped to
            self.create_empty_definition(in_conn, edge, callsite_stream, output=True)

            var_name = edge.dst_conn

            callsite_stream.write(
                f'{util.TYPE_TO_SVE[in_conn.type]} {var_name};')

            callsite_stream.write('{')
            callsite_stream.write('// Stream pop')

            # Pop into local buffer
            # 256 // in_conn.vtype.bytes
            n_vec = f'{util.REGISTER_BYTE_SIZE} / {in_conn.vtype.bytes}'
            callsite_stream.write(f'{in_conn.vtype.ctype} __tmp[{n_vec}];')
            callsite_stream.write(
                f'size_t __cnt = {edge.data.data}.pop_try(__tmp, {n_vec});')

            # Limit the loop predicate
            loop_pred = util.get_loop_predicate(sdfg, dfg, dst_node)
            callsite_stream.write(
                f'{loop_pred} = svand_z({loop_pred}, {loop_pred}, svwhilelt_b{in_conn.vtype.bytes * 8}(0ll, __cnt));')

            # Transfer to register
            callsite_stream.write(f'{var_name} = svld1({loop_pred}, __tmp);')

            callsite_stream.write('}')
            """
            return

        if isinstance(in_conn, dtypes.vector):
            # Copy from vector, so we can use svld

            if in_conn.type not in util.TYPE_TO_SVE:
                raise NotImplementedError(
                    f'Data type {in_conn.type} not supported')

            self.dispatcher.defined_vars.add(edge.dst_conn, dtypes.vector,
                                             in_conn.ctype)

            # Determine the stride of the load and use a gather if applicable
            stride = self.get_load_stride(sdfg, dfg, dst_node, edge.data)

            # First part of the declaration is `type name`
            load_lhs = '{} {}'.format(util.TYPE_TO_SVE[in_conn.type],
                                      edge.dst_conn)

            ptr_cast = ''
            if in_conn.type == np.int64:
                ptr_cast = '(int64_t*) '
            elif in_conn.type == np.uint64:
                ptr_cast = '(uint64_t*) '

            # Regular load and gather share the first arguments
            load_args = '{}, {}'.format(
                util.get_loop_predicate(sdfg, dfg, dst_node), ptr_cast +
                cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer))

            if stride == 1:
                callsite_stream.write('{} = svld1({});'.format(
                    load_lhs, load_args))
            else:
                callsite_stream.write(
                    '{} = svld1_gather_index({}, svindex_s{}(0, {}));'.format(
                        load_lhs, load_args,
                        util.get_base_type(in_conn).bytes * 8, sym2cpp(stride)))
        else:
            # Any other copy (e.g. pointer or scalar) is handled by the default CPU codegen
            self.cpu_codegen.copy_memory(sdfg, dfg, state_id, src_node,
                                         dst_node, edge, function_stream,
                                         callsite_stream)
예제 #13
0
파일: codegen.py 프로젝트: am-ivanov/dace
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
        entry_node = scope.source_nodes()[0]
        current_map = entry_node.map
        self.current_map = current_map

        if len(current_map.params) > 1:
            raise util.NotSupportedError('SVE map must be one dimensional')

        loop_types = list(set([util.get_base_type(sdfg.arrays[a].dtype) for a in sdfg.arrays]))

        # Edge case if no arrays are used
        loop_type = loop_types[0] if len(loop_types) > 0 else dace.int64

        ltype_size = loop_type.bytes

        long_type = copy.copy(dace.int64)
        long_type.ctype = 'int64_t'

        self.counter_type = {1: dace.int8, 2: dace.int16, 4: dace.int32, 8: long_type}[ltype_size]

        callsite_stream.write('{')
        self.dispatcher.defined_vars.enter_scope(scope)

        # Define all dynamic input connectors of the map entry
        state_dfg = sdfg.node(state_id)
        for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node):
            if e.data.data != e.dst_conn:
                callsite_stream.write(
                    self.cpu_codegen.memlet_definition(sdfg, e.data, False, e.dst_conn,
                                                       e.dst.in_connectors[e.dst_conn]), sdfg, state_id, entry_node)

        param = current_map.params[0]
        rng = current_map.range[0]
        begin, end, stride = (sym2cpp(r) for r in rng)

        # Generate the SVE loop header
        # The name of our loop predicate is always __pg_{param}
        self.dispatcher.defined_vars.add('__pg_' + param, DefinedType.Scalar, 'svbool_t')

        # Declare our counting variable (e.g. i) and precompute the loop predicate for our range
        callsite_stream.write(f'{self.counter_type} {param} = {begin};')

        end_param = f'__{param}_to'
        callsite_stream.write(f'{self.counter_type} {end_param} = {end};')

        callsite_stream.write(f'svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        # Test for the predicate
        callsite_stream.write(f'while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param})) {{')

        # Allocate scope related memory
        for node, _ in scope.all_nodes_recursive():
            if isinstance(node, nodes.Tasklet):
                # Create empty shared registers for outputs into other tasklets
                for edge in state_dfg.out_edges(node):
                    if isinstance(edge.dst, dace.nodes.Tasklet):
                        self.generate_out_register(sdfg, state_dfg, edge, callsite_stream, True)

        # Dispatch the subgraph generation
        self.dispatcher.dispatch_subgraph(sdfg,
                                          scope,
                                          state_id,
                                          function_stream,
                                          callsite_stream,
                                          skip_entry_node=True,
                                          skip_exit_node=True)

        # Increase the counting variable (according to the number of processed elements)
        size_letter = {1: 'b', 2: 'h', 4: 'w', 8: 'd'}[ltype_size]
        callsite_stream.write(f'{param} += svcnt{size_letter}() * {stride};')

        # Then recompute the loop predicate
        callsite_stream.write(f'__pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        callsite_stream.write('}')

        self.dispatcher.defined_vars.exit_scope(scope)
        callsite_stream.write('}')
예제 #14
0
파일: codegen.py 프로젝트: am-ivanov/dace
    def generate_writeback(self, sdfg: SDFG, state: SDFGState, map: nodes.Map,
                           edge: graph.MultiConnectorEdge[mm.Memlet], code: CodeIOStream):
        """
            Responsible for generating code for a writeback in a Tasklet, given the outgoing edge.
            This is mainly taking the temporary register and writing it back.
        """
        if edge.src_conn is None:
            return

        dst_node = state.memlet_path(edge)[-1].dst

        src_type = edge.src.out_connectors[edge.src_conn]
        src_name = edge.src_conn

        if isinstance(dst_node, nodes.Tasklet):
            ##################
            # Code->Code edges
            dst_type = edge.dst.in_connectors[edge.dst_conn]

            if (util.is_vector(src_type) and util.is_vector(dst_type)) or (util.is_scalar(src_type)
                                                                           and util.is_scalar(dst_type)):
                # Simply write back to shared register
                code.write(f'{edge.data.data} = {src_name};')
            elif util.is_scalar(src_type) and util.is_vector(dst_type):
                # Scalar broadcast to shared vector register
                code.write(f'{edge.data.data} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({src_name});')
            else:
                raise util.NotSupportedError('Unsupported Code->Code edge')
        elif isinstance(dst_node, nodes.AccessNode):
            ##################
            # Write to AccessNode
            desc = dst_node.desc(sdfg)
            if isinstance(desc, data.Array):
                ##################
                # Write into Array
                if util.is_pointer(src_type):
                    raise util.NotSupportedError('Unsupported writeback')
                elif util.is_vector(src_type):
                    ##################
                    # Scatter vector store into array

                    stride = edge.data.get_stride(sdfg, map)

                    # long long fix
                    ptr_cast = ''
                    if src_type.type == np.int64:
                        ptr_cast = '(int64_t*) '
                    elif src_type.type == np.uint64:
                        ptr_cast = '(uint64_t*) '

                    store_args = '{}, {}'.format(
                        util.get_loop_predicate(sdfg, state, edge.src),
                        ptr_cast + cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer, codegen=self.frame),
                    )

                    if stride == 1:
                        code.write(f'svst1({store_args}, {src_name});')
                    else:
                        code.write(
                            f'svst1_scatter_index({store_args}, svindex_s{util.get_base_type(src_type).bytes * 8}(0, {sym2cpp(stride)}), {src_name});'
                        )
                else:
                    ##################
                    # Scalar write into array
                    code.write(f'{cpp.cpp_array_expr(sdfg, edge.data, codegen=self.frame)} = {src_name};')
            elif isinstance(desc, data.Scalar):
                ##################
                # Write into Scalar
                if util.is_pointer(src_type):
                    raise util.NotSupportedError('Unsupported writeback')
                elif util.is_vector(src_type):
                    if util.is_vector(desc.dtype):
                        ##################
                        # Vector write into vector Scalar access node
                        code.write(f'{edge.data.data} = {src_name};')
                    else:
                        raise util.NotSupportedError('Unsupported writeback')
                else:
                    if util.is_vector(desc.dtype):
                        ##################
                        # Broadcast into scalar AccessNode
                        code.write(f'{edge.data.data} = svdup_{util.TYPE_TO_SVE_SUFFIX[src_type]}({src_name});')
                    else:
                        ##################
                        # Scalar write into scalar AccessNode
                        code.write(f'{edge.data.data} = {src_name};')

        else:
            raise util.NotSupportedError('Only writeback to Tasklets and AccessNodes is supported')
예제 #15
0
파일: codegen.py 프로젝트: am-ivanov/dace
    def generate_read(self, sdfg: SDFG, state: SDFGState, map: nodes.Map, edge: graph.MultiConnectorEdge[mm.Memlet],
                      code: CodeIOStream):
        """
            Responsible for generating code for reads into a Tasklet, given the ingoing edge.
        """
        if edge.dst_conn is None:
            return
        src_node = state.memlet_path(edge)[0].src
        dst_type = edge.dst.in_connectors[edge.dst_conn]
        dst_name = edge.dst_conn
        if isinstance(src_node, nodes.Tasklet):
            ##################
            # Code->Code edges
            src_type = edge.src.out_connectors[edge.src_conn]
            if util.is_vector(src_type) and util.is_vector(dst_type):
                # Directly read from shared vector register
                code.write(f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = {edge.data.data};')
            elif util.is_scalar(src_type) and util.is_scalar(dst_type):
                # Directly read from shared scalar register
                code.write(f'{dst_type} {dst_name} = {edge.data.data};')
            elif util.is_scalar(src_type) and util.is_vector(dst_type):
                # Scalar broadcast from shared scalar register
                code.write(
                    f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({edge.data.data});'
                )
            else:
                raise util.NotSupportedError('Unsupported Code->Code edge')
        elif isinstance(src_node, nodes.AccessNode):
            ##################
            # Read from AccessNode
            desc = src_node.desc(sdfg)
            if isinstance(desc, data.Array):
                # Copy from array
                if util.is_pointer(dst_type):
                    ##################
                    # Pointer reference
                    code.write(
                        f'{dst_type} {dst_name} = {cpp.cpp_ptr_expr(sdfg, edge.data, None, codegen=self.frame)};')
                elif util.is_vector(dst_type):
                    ##################
                    # Vector load

                    stride = edge.data.get_stride(sdfg, map)

                    # First part of the declaration is `type name`
                    load_lhs = '{} {}'.format(util.TYPE_TO_SVE[dst_type.type], dst_name)

                    # long long issue casting
                    ptr_cast = ''
                    if dst_type.type == np.int64:
                        ptr_cast = '(int64_t*) '
                    elif dst_type.type == np.uint64:
                        ptr_cast = '(uint64_t*) '

                    # Regular load and gather share the first arguments
                    load_args = '{}, {}'.format(
                        util.get_loop_predicate(sdfg, state, edge.dst),
                        ptr_cast + cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer, codegen=self.frame))

                    if stride == 1:
                        code.write('{} = svld1({});'.format(load_lhs, load_args))
                    else:
                        code.write('{} = svld1_gather_index({}, svindex_s{}(0, {}));'.format(
                            load_lhs, load_args,
                            util.get_base_type(dst_type).bytes * 8, sym2cpp(stride)))
                else:
                    ##################
                    # Scalar read from array
                    code.write(f'{dst_type} {dst_name} = {cpp.cpp_array_expr(sdfg, edge.data, codegen=self.frame)};')
            elif isinstance(desc, data.Scalar):
                # Refer to shared variable
                src_type = desc.dtype
                if util.is_vector(src_type) and util.is_vector(dst_type):
                    # Directly read from shared vector register
                    code.write(f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = {edge.data.data};')
                elif util.is_scalar(src_type) and util.is_scalar(dst_type):
                    # Directly read from shared scalar register
                    code.write(f'{dst_type} {dst_name} = {edge.data.data};')
                elif util.is_scalar(src_type) and util.is_vector(dst_type):
                    # Scalar broadcast from shared scalar register
                    code.write(
                        f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({edge.data.data});'
                    )
                else:
                    raise util.NotSupportedError('Unsupported Scalar->Code edge')
        else:
            raise util.NotSupportedError('Only copy from Tasklets and AccessNodes is supported')