Esempio n. 1
0
 def define_stream(dtype, vector_length, buffer_size, var_name, array_size,
                   function_stream, kernel_stream):
     if cpp.sym2cpp(array_size) == "1":
         kernel_stream.write("dace::FIFO<{}, {}, {}> {}(\"{}\");".format(
             dtype.ctype, vector_length, buffer_size, var_name, var_name))
     else:
         kernel_stream.write("dace::FIFO<{}, {}, {}> {}[{}];\n".format(
             dtype.ctype, vector_length, buffer_size, var_name,
             cpp.sym2cpp(array_size)))
         kernel_stream.write("dace::SetNames({}, \"{}\", {});".format(
             var_name, var_name, cpp.sym2cpp(array_size)))
Esempio n. 2
0
    def define_stream(dtype, buffer_size, var_name, array_size, function_stream,
                      kernel_stream):
        ctype = "dace::FIFO<{}, {}, {}>".format(dtype.base_type.ctype,
                                                dtype.veclen, buffer_size)
        if cpp.sym2cpp(array_size) == "1":
            kernel_stream.write("{} {}(\"{}\");".format(ctype, var_name,
                                                        var_name))
        else:
            kernel_stream.write("{} {}[{}];\n".format(ctype, var_name,
                                                      cpp.sym2cpp(array_size)))
            kernel_stream.write("dace::SetNames({}, \"{}\", {});".format(
                var_name, var_name, cpp.sym2cpp(array_size)))

        # Return value is used for adding to defined_vars in fpga.py
        return ctype
Esempio n. 3
0
    def copy_memory(self, sdfg, dfg, state_id, src_node, dst_node, edge,
                    function_stream, callsite_stream):
        memlet = edge.data
        if (isinstance(src_node, nodes.AccessNode)
                and (src_node.desc(sdfg).materialize_func is not None)):
            function_stream.write(src_node.desc(sdfg).materialize_func)

            if edge.dst_conn is not None:
                arrayname = str(edge.dst_conn)
            else:
                arrayname = str(dst_node.desc)

            if isinstance(dst_node, nodes.Tasklet) or \
                    (dst_node.desc(sdfg).storage == dtypes.StorageType.Register):
                callsite_stream.write(
                    self.memlet_definition(sdfg,
                                           memlet,
                                           arrayname,
                                           direction="in"), sdfg, state_id,
                    [src_node, dst_node])
            else:
                callsite_stream.write("__dace_materialize(\"" + \
                                      sym2cpp(src_node) + "\", " + \
                                      sym2cpp(memlet.subset.min_element()[0]) +
                                      ", " + \
                                      sym2cpp(memlet.subset.min_element()[0] +
                                          memlet.subset.num_elements()) +
                                      ", " + sym2cpp(dst_node.data) + ");\n",
                                      sdfg, state_id, [src_node, dst_node])

        if (isinstance(dst_node, nodes.AccessNode)
                and (dst_node.desc(sdfg).materialize_func is not None)):
            # This case is pretty complicated due to how the rest of the
            # codegen works: This is not the place to actually copy code. In
            # the place where data is ready to be written there will be a call
            # __foo.write(foo) where foo is the local_name of the memlet that
            # "causes" the write. But this function is actually called when
            # we should set up everything for this call to work.
            # The above mentioned code is generated by process_out_memlets

            function_stream.write(dst_node.desc(sdfg).materialize_func)
            if isinstance(src_node, nodes.Tasklet) or \
                    (src_node.desc(sdfg).storage == dtypes.StorageType.Register):
                callsite_stream.write(
                    self.memlet_definition(sdfg,
                                           memlet,
                                           edge.src_conn,
                                           direction="out"), sdfg, state_id,
                    [src_node, dst_node])
            else:
                callsite_stream.write("__dace_serialize(\"" + \
                        sym2cpp(dst_node) + "\", " + \
                        sym2cpp(memlet.subset.min_element()[0]) +
                        ", " + \
                        sym2cpp(memlet.subset.min_element()[0] +
                            memlet.subset.num_elements()) +
                        ", " + sym2cpp(src_node.data) + ");\n",
                    sdfg, state_id, [src_node, dst_node])
Esempio n. 4
0
def replace_properties(node: Any, symrepl: Dict[symbolic.symbol,
                                                symbolic.SymbolicType],
                       name: str, new_name: str):
    for propclass, propval in node.properties():
        if propval is None:
            continue
        pname = propclass.attr_name
        if isinstance(propclass, properties.SymbolicProperty):
            setattr(node, pname, propval.subs(symrepl))
        elif isinstance(propclass, properties.DataProperty):
            if propval == name:
                setattr(node, pname, new_name)
        elif isinstance(propclass,
                        (properties.RangeProperty, properties.ShapeProperty)):
            setattr(node, pname, _replsym(list(propval), symrepl))
        elif isinstance(propclass, properties.CodeProperty):
            # Don't replace variables that appear as an input or an output
            # connector, as this should shadow the outer declaration.
            if hasattr(node, 'in_connectors'):
                if name in node.in_connectors:
                    continue
            if hasattr(node, 'out_connectors'):
                if name in node.out_connectors:
                    continue
            if isinstance(propval.code, str):
                if str(name) != str(new_name):
                    lang = propval.language
                    newcode = propval.code
                    if not re.findall(r'[^\w]%s[^\w]' % name, newcode):
                        continue

                    if lang is dtypes.Language.CPP:  # Replace in C++ code
                        # Avoid import loop
                        from dace.codegen.targets.cpp import sym2cpp

                        # Use local variables and shadowing to replace
                        replacement = 'auto %s = %s;\n' % (name,
                                                           sym2cpp(new_name))
                        propval.code = replacement + newcode
                    else:
                        warnings.warn(
                            'Replacement of %s with %s was not made '
                            'for string tasklet code of language %s' %
                            (name, new_name, lang))
            elif propval.code is not None:
                for stmt in propval.code:
                    ASTFindReplace({
                        name: symbolic.symstr(new_name)
                    }).visit(stmt)
        elif (isinstance(propclass, properties.DictProperty)
              and pname == 'symbol_mapping'):
            # Symbol mappings for nested SDFGs
            for symname, sym_mapping in propval.items():
                try:
                    propval[symname] = symbolic.pystr_to_symbolic(
                        str(sym_mapping)).subs(symrepl)
                except AttributeError:  # If the symbolified value has no subs
                    pass
Esempio n. 5
0
    def on_node_end(self, sdfg: SDFG, state: SDFGState, node: nodes.AccessNode,
                    outer_stream: CodeIOStream, inner_stream: CodeIOStream,
                    global_stream: CodeIOStream):
        from dace.codegen.dispatcher import DefinedType  # Avoid import loop

        if is_devicelevel_gpu(sdfg, state, node) or is_devicelevel_fpga(
                sdfg, state, node):
            # Only run on host code
            return

        desc = node.desc(sdfg)

        # Obtain a pointer for arrays and scalars
        ptrname = cpp.ptr(node.data, desc, sdfg, self.codegen)
        defined_type, _ = self.codegen.dispatcher.defined_vars.get(ptrname)
        if defined_type == DefinedType.Scalar:
            ptrname = '&' + ptrname

        # Create UUID
        state_id = sdfg.node_id(state)
        node_id = state.node_id(node)
        uuid = f'{sdfg.sdfg_id}_{state_id}_{node_id}'

        # Get optional pre/postamble for instrumenting device data
        preamble, postamble = '', ''
        if desc.storage == dtypes.StorageType.GPU_Global:
            self._setup_gpu_runtime(sdfg, global_stream)
            preamble, postamble, ptrname = self._generate_copy_to_host(
                node, desc, ptrname)

        # Encode runtime shape and strides
        shape = ', '.join(cpp.sym2cpp(s) for s in desc.shape)
        strides = ', '.join(cpp.sym2cpp(s) for s in desc.strides)

        # Write code
        inner_stream.write(preamble, sdfg, state_id, node_id)
        inner_stream.write(
            f'__state->serializer->save({ptrname}, {cpp.sym2cpp(desc.total_size - desc.start_offset)}, '
            f'"{node.data}", "{uuid}", {shape}, {strides});\n', sdfg, state_id,
            node_id)
        inner_stream.write(postamble, sdfg, state_id, node_id)
Esempio n. 6
0
 def define_local_array(dtype, vector_length, var_name, array_size, storage,
                        shape, function_stream, kernel_stream, sdfg,
                        state_id, node):
     kernel_stream.write("dace::vec<{}, {}> {}[{}];\n".format(
         dtype.ctype, vector_length, var_name, cpp.sym2cpp(array_size)))
     if storage == dace.dtypes.StorageType.FPGA_Registers:
         kernel_stream.write("#pragma HLS ARRAY_PARTITION variable={} "
                             "complete\n".format(var_name))
     elif len(shape) > 1:
         kernel_stream.write("#pragma HLS ARRAY_PARTITION variable={} "
                             "block factor={}\n".format(
                                 var_name, shape[-2]))
Esempio n. 7
0
    def define_stream(dtype, buffer_size, var_name, array_size,
                      function_stream, kernel_stream):
        """
           Defines a stream
           :return: a tuple containing the type of the created variable, and boolean indicating
               whether this is a global variable or not
           """
        ctype = "dace::FIFO<{}, {}, {}>".format(dtype.base_type.ctype,
                                                dtype.veclen, buffer_size)
        if cpp.sym2cpp(array_size) == "1":
            kernel_stream.write("{} {}(\"{}\");".format(
                ctype, var_name, var_name))
        else:
            kernel_stream.write("{} {}[{}];\n".format(ctype, var_name,
                                                      cpp.sym2cpp(array_size)))
            kernel_stream.write("dace::SetNames({}, \"{}\", {});".format(
                var_name, var_name, cpp.sym2cpp(array_size)))

        # In Xilinx, streams are defined as local variables
        # Return value is used for adding to defined_vars in fpga.py
        return ctype, False
Esempio n. 8
0
def _cases_from_branches(
    edges: List[Edge[InterstateEdge]],
    cblocks: Dict[Edge[InterstateEdge], GeneralBlock],
) -> Tuple[str, Dict[str, GeneralBlock]]:
    """ 
    If the input list of edges correspond to a switch/case scope (with all
    conditions being "x == y" for a unique symbolic x and integers y),
    returns the switch/case scope parameters.
    :param edges: List of inter-state edges.
    :return: Tuple of (case variable C++ expression, mapping from case to 
             control flow block). If not a valid switch/case scope, 
             returns None.
    """
    cond = edges[0].data.condition_sympy()
    if not isinstance(cond, sp.Basic):
        return None
    a = sp.Wild('a')
    b = sp.Wild('b', properties=[lambda k: k.is_Integer])
    m = cond.match(sp.Eq(a, b))
    if m:
        # Obtain original code for variable
        call_or_compare = edges[0].data.condition.code[0].value
        if isinstance(call_or_compare, ast.Call):
            astvar = call_or_compare.args[0]
        else:  # Binary comparison
            astvar = call_or_compare.left
    else:
        # Try integer == symbol
        m = cond.match(sp.Eq(b, a))
        if m:
            call_or_compare = edges[0].data.condition.code[0].value
            if isinstance(call_or_compare, ast.Call):
                astvar = call_or_compare.args[1]
            else:  # Binary comparison
                astvar = call_or_compare.right
        else:
            return None

    # Get C++ expression from AST
    switchvar = cppunparse.pyexpr2cpp(astvar)

    # Check that all edges match criteria
    result = {}
    for e in edges:
        ematch = e.data.condition_sympy().match(sp.Eq(m[a], b))
        if not ematch:
            ematch = e.data.condition_sympy().match(sp.Eq(b, m[a]))
            if not ematch:
                return None
        # Create mapping to codeblocks
        result[cpp.sym2cpp(ematch[b])] = cblocks[e]

    return switchvar, result
Esempio n. 9
0
 def make_kernel_argument(data: dt.Data,
                          var_name: str,
                          subset_info: Union[int, subsets.Subset],
                          sdfg: SDFG,
                          is_output: bool,
                          with_vectorization: bool,
                          interface_id: Union[int, List[int]] = None):
     if isinstance(data, dt.Array):
         var_name = fpga.fpga_ptr(var_name, data, sdfg, subset_info,
                                  is_output, None, None, True, interface_id)
         if with_vectorization:
             dtype = data.dtype
         else:
             dtype = data.dtype.base_type
         return "{} *{}".format(dtype.ctype, var_name)
     if isinstance(data, dt.Stream):
         ctype = "dace::FIFO<{}, {}, {}>".format(
             data.dtype.base_type.ctype, cpp.sym2cpp(data.dtype.veclen),
             cpp.sym2cpp(data.buffer_size))
         return "{} &{}".format(ctype, var_name)
     else:
         return data.as_arg(with_types=True, name=var_name)
Esempio n. 10
0
 def define_local_array(self, var_name, desc, array_size, function_stream,
                        kernel_stream, sdfg, state_id, node):
     dtype = desc.dtype
     kernel_stream.write("{} {}[{}];\n".format(dtype.ctype, var_name,
                                               cpp.sym2cpp(array_size)))
     if desc.storage == dace.dtypes.StorageType.FPGA_Registers:
         kernel_stream.write("#pragma HLS ARRAY_PARTITION variable={} "
                             "complete\n".format(var_name))
     elif desc.storage == dace.dtypes.StorageType.FPGA_Local:
         pass
     else:
         raise ValueError("Unsupported storage type: {}".format(
             desc.storage.name))
     self._dispatcher.defined_vars.add(var_name, DefinedType.Pointer,
                                       '%s *' % dtype.ctype)
Esempio n. 11
0
def generate_dummy(sdfg) -> str:
    """ Generates a C program calling this SDFG. Since we do not
        know the purpose/semantics of the program, we allocate
        the right types and and guess values for scalars.
    """
    includes = "#include <stdlib.h>\n"
    includes += "#include \"" + sdfg.name + ".h\"\n\n"
    header = "int main(int argc, char** argv) {\n"
    allocations = ""
    deallocations = ""
    sdfg_call = ""
    footer = "  return 0;\n}\n"

    al = sdfg.arglist()

    # first find all scalars and set them to 42
    for argname, arg in al.items():
        if isinstance(arg, data.Scalar):
            allocations += "  " + str(arg.as_arg(name=argname,
                                                 with_types=True)) + " = 42;\n"

    # allocate the array args using calloc
    for argname, arg in al.items():
        if isinstance(arg, data.Array):
            dims_mul = cpp.sym2cpp(
                functools.reduce(lambda a, b: a * b, arg.shape, 1))
            basetype = str(arg.dtype)
            allocations += "  " + str(arg.as_arg(name=argname, with_types=True)) + \
                           " = (" + basetype + "*) calloc(" + dims_mul + ", sizeof("+ basetype +")" + ");\n"
            deallocations += "  free(" + argname + ");\n"

    sdfg_call = '''
  __dace_init_{name}({params});
  __program_{name}({params});
  __dace_exit_{name}({params});\n\n'''.format(name=sdfg.name,
                                              params=sdfg.signature(
                                                  with_types=False,
                                                  for_call=True))

    res = includes
    res += header
    res += allocations
    res += sdfg_call
    res += deallocations
    res += footer
    return res
Esempio n. 12
0
    def memlet_definition(self, sdfg, memlet, local_name, direction="in"):
        if isinstance(memlet.data, data.Stream):
            return 'auto& %s = %s;\n' % (local_name, memlet.data)

        result = ('auto __%s = ' % local_name +
                  self.memlet_view_ctor(sdfg, memlet, direction) + ';\n')

        # Allocate variable type
        memlet_type = '    dace::vec<%s, %s>' % (
            sdfg.arrays[memlet.data].dtype.ctype, sym2cpp(memlet.veclen))
        if memlet.subset.data_dims() == 0 and memlet.num_accesses >= 0:
            result += memlet_type + ' ' + local_name
            if direction == "in":
                result += ' = __%s;\n' % local_name
            else:
                result += ';\n'

        return result
Esempio n. 13
0
    def _generate_copy_to_device(self, node: nodes.AccessNode, desc: dt.Array,
                                 ptr: str) -> Tuple[str, str, str]:
        """ Copies restored data to device and returns (preamble, postamble, name of new host pointer). """
        new_ptr = f'__dinstr_{node.data}'
        new_desc = dt.Array(desc.dtype, [desc.total_size - desc.start_offset])
        csize = cpp.sym2cpp(desc.total_size - desc.start_offset)

        # Emit synchronous memcpy
        preamble = f'''
        {{
        {new_desc.as_arg(name=new_ptr)} = new {desc.dtype.ctype}[{csize}];
        '''

        postamble = f'''
        {self.backend}Memcpy({ptr}, {new_ptr}, sizeof({desc.dtype.ctype}) * ({csize}), {self.backend}MemcpyHostToDevice);
        delete[] {new_ptr};
        }}
        '''

        return preamble, postamble, new_ptr
Esempio n. 14
0
def generate_dummy(sdfg: SDFG, frame: framecode.DaCeCodeGenerator) -> str:
    """ Generates a C program calling this SDFG. Since we do not
        know the purpose/semantics of the program, we allocate
        the right types and and guess values for scalars.
    """
    al = frame.arglist
    init_params = sdfg.signature(with_types=False,
                                 for_call=True,
                                 with_arrays=False,
                                 arglist=frame.arglist_scalars_only)
    params = sdfg.signature(with_types=False,
                            for_call=True,
                            arglist=frame.arglist)
    if len(params) > 0:
        params = ', ' + params

    allocations = ''
    deallocations = ''

    # first find all scalars and set them to 42
    for argname, arg in al.items():
        if isinstance(arg, data.Scalar):
            allocations += ("    " +
                            str(arg.as_arg(name=argname, with_types=True)) +
                            " = 42;\n")

    # allocate the array args using calloc
    for argname, arg in al.items():
        if isinstance(arg, data.Array):
            dims_mul = cpp.sym2cpp(
                functools.reduce(lambda a, b: a * b, arg.shape, 1))
            basetype = str(arg.dtype)
            allocations += ("    " +
                            str(arg.as_arg(name=argname, with_types=True)) +
                            " = (" + basetype + "*) calloc(" + dims_mul +
                            ", sizeof(" + basetype + ")" + ");\n")
            deallocations += "    free(" + argname + ");\n"

    return f'''#include <cstdlib>
Esempio n. 15
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView,
                       state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
        entry_node = scope.source_nodes()[0]

        loop_type = list(set([sdfg.arrays[a].dtype for a in sdfg.arrays]))[0]
        ltype_size = loop_type.bytes

        long_type = copy.copy(dace.int64)
        long_type.ctype = 'int64_t'

        self.counter_type = {
            1: dace.int8,
            2: dace.int16,
            4: dace.int32,
            8: long_type
        }[ltype_size]

        callsite_stream.write('{')

        # Define all input connectors of the map entry
        state_dfg = sdfg.node(state_id)
        for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node):
            if e.data.data != e.dst_conn:
                callsite_stream.write(
                    self.cpu_codegen.memlet_definition(
                        sdfg, e.data, False, e.dst_conn,
                        e.dst.in_connectors[e.dst_conn]), sdfg, state_id,
                    entry_node)

        # We only create an SVE do-while in the innermost loop
        for param, rng in zip(entry_node.map.params, entry_node.map.range):
            begin, end, stride = (sym2cpp(r) for r in rng)

            self.dispatcher.defined_vars.enter_scope(sdfg)

            # Check whether we are in the innermost loop
            if param != entry_node.map.params[-1]:
                # Default C++ for-loop
                callsite_stream.write(
                    f'for(auto {param} = {begin}; {param} <= {end}; {param} += {stride}) {{'
                )
            else:
                # Generate the SVE loop header

                # The name of our loop predicate is always __pg_{param}
                self.dispatcher.defined_vars.add('__pg_' + param,
                                                 DefinedType.Scalar, 'svbool_t')

                # Declare our counting variable (e.g. i) and precompute the loop predicate for our range
                callsite_stream.write(
                    f'''{self.counter_type} {param} = {begin};
                    svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end});
                    do {{''', sdfg, state_id, entry_node)

        # Dispatch the subgraph generation
        self.dispatcher.dispatch_subgraph(sdfg,
                                          scope,
                                          state_id,
                                          function_stream,
                                          callsite_stream,
                                          skip_entry_node=True,
                                          skip_exit_node=True)

        # Close the loops from above (in reverse)
        for param, rng in zip(reversed(entry_node.map.params),
                              reversed(entry_node.map.range)):
            # The innermost loop is SVE and needs a special while-footer, otherwise we just add the closing bracket
            if param != entry_node.map.params[-1]:
                # Close the default C++ for-loop
                callsite_stream.write('}')
            else:
                # Generate the SVE loop footer

                _, end, stride = (sym2cpp(r) for r in rng)

                # Increase the counting variable (according to the number of processed elements)
                # Then recompute the loop predicate and test for it
                callsite_stream.write(
                    f'''{param} += svcntp_b{ltype_size * 8}(__pg_{param}, __pg_{param}) * {stride};
                    __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end});
                    }} while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param}));''',
                    sdfg, state_id, entry_node)

            self.dispatcher.defined_vars.exit_scope(sdfg)

        callsite_stream.write('}')
Esempio n. 16
0
    def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream,
                       callsite_stream: CodeIOStream):
        entry_node = scope.source_nodes()[0]
        current_map = entry_node.map
        self.current_map = current_map

        if len(current_map.params) > 1:
            raise util.NotSupportedError('SVE map must be one dimensional')

        loop_types = list(set([util.get_base_type(sdfg.arrays[a].dtype) for a in sdfg.arrays]))

        # Edge case if no arrays are used
        loop_type = loop_types[0] if len(loop_types) > 0 else dace.int64

        ltype_size = loop_type.bytes

        long_type = copy.copy(dace.int64)
        long_type.ctype = 'int64_t'

        self.counter_type = {1: dace.int8, 2: dace.int16, 4: dace.int32, 8: long_type}[ltype_size]

        callsite_stream.write('{')
        self.dispatcher.defined_vars.enter_scope(scope)

        # Define all dynamic input connectors of the map entry
        state_dfg = sdfg.node(state_id)
        for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node):
            if e.data.data != e.dst_conn:
                callsite_stream.write(
                    self.cpu_codegen.memlet_definition(sdfg, e.data, False, e.dst_conn,
                                                       e.dst.in_connectors[e.dst_conn]), sdfg, state_id, entry_node)

        param = current_map.params[0]
        rng = current_map.range[0]
        begin, end, stride = (sym2cpp(r) for r in rng)

        # Generate the SVE loop header
        # The name of our loop predicate is always __pg_{param}
        self.dispatcher.defined_vars.add('__pg_' + param, DefinedType.Scalar, 'svbool_t')

        # Declare our counting variable (e.g. i) and precompute the loop predicate for our range
        callsite_stream.write(f'{self.counter_type} {param} = {begin};')

        end_param = f'__{param}_to'
        callsite_stream.write(f'{self.counter_type} {end_param} = {end};')

        callsite_stream.write(f'svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        # Test for the predicate
        callsite_stream.write(f'while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param})) {{')

        # Allocate scope related memory
        for node, _ in scope.all_nodes_recursive():
            if isinstance(node, nodes.Tasklet):
                # Create empty shared registers for outputs into other tasklets
                for edge in state_dfg.out_edges(node):
                    if isinstance(edge.dst, dace.nodes.Tasklet):
                        self.generate_out_register(sdfg, state_dfg, edge, callsite_stream, True)

        # Dispatch the subgraph generation
        self.dispatcher.dispatch_subgraph(sdfg,
                                          scope,
                                          state_id,
                                          function_stream,
                                          callsite_stream,
                                          skip_entry_node=True,
                                          skip_exit_node=True)

        # Increase the counting variable (according to the number of processed elements)
        size_letter = {1: 'b', 2: 'h', 4: 'w', 8: 'd'}[ltype_size]
        callsite_stream.write(f'{param} += svcnt{size_letter}() * {stride};')

        # Then recompute the loop predicate
        callsite_stream.write(f'__pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});')

        callsite_stream.write('}')

        self.dispatcher.defined_vars.exit_scope(scope)
        callsite_stream.write('}')
Esempio n. 17
0
    def expansion(node: 'Reduce', state: SDFGState, sdfg: SDFG):
        node.validate(sdfg, state)
        inedge: graph.MultiConnectorEdge = state.in_edges(node)[0]
        outedge: graph.MultiConnectorEdge = state.out_edges(node)[0]
        input_dims = len(inedge.data.subset)
        output_dims = len(outedge.data.subset)
        input_data = sdfg.arrays[inedge.data.data]
        output_data = sdfg.arrays[outedge.data.data]

        # Get reduction type for OpenMP
        redtype = detect_reduction_type(node.wcr, openmp=True)
        if redtype not in ExpandReduceOpenMP._REDUCTION_TYPE_TO_OPENMP:
            raise ValueError('Reduction type not supported for "%s"' %
                             node.wcr)
        omptype, expr = ExpandReduceOpenMP._REDUCTION_TYPE_TO_OPENMP[redtype]

        # Standardize axes
        axes = node.axes if node.axes else [i for i in range(input_dims)]

        outer_loops = len(axes) != input_dims

        # Create OpenMP clause
        if outer_loops:
            code = '#pragma omp parallel for collapse({cdim})\n'.format(
                cdim=output_dims)
        else:
            code = ''

        from dace.codegen.targets.cpp import sym2cpp

        # Output loops
        out_offset = []
        if outer_loops:
            for i, sz in enumerate(outedge.data.subset.size()):
                code += 'for (int _o{i} = 0; _o{i} < {sz}; ++_o{i}) {{\n'.format(
                    i=i, sz=sym2cpp(sz))
                out_offset.append('_o%d * %s' %
                                  (i, sym2cpp(output_data.strides[i])))
        else:
            out_offset.append('0')

        outexpr = '_out[%s]' % ' + '.join(out_offset)

        # Write identity value first
        if node.identity is not None:
            code += '%s = %s;\n' % (outexpr, node.identity)

        # Reduction OpenMP clause
        code += '#pragma omp parallel for collapse({cdim}) ' \
          'reduction({rtype}: {oexpr})\n'.format(cdim=len(axes), rtype=omptype,
            oexpr=outexpr)

        # Reduction loops
        for i, axis in enumerate(sorted(axes)):
            sz = sym2cpp(inedge.data.subset.size()[axis])
            code += 'for (int _i{i} = 0; _i{i} < {sz}; ++_i{i}) {{\n'.format(
                i=i, sz=sz)

        # Prepare input offset expression
        in_offset = []
        ictr, octr = 0, 0
        for i in range(input_dims):
            if i in axes:
                result = '_i%d' % ictr
                ictr += 1
            else:
                result = '_o%d' % octr
                octr += 1
            in_offset.append('%s * %s' %
                             (result, sym2cpp(input_data.strides[i])))
        in_offset = ' + '.join(in_offset)

        # Reduction expression
        code += expr.format(i='_in[%s]' % in_offset, o=outexpr)
        code += '\n'

        # Closing braces
        code += '}\n' * len(axes)
        if outer_loops:
            code += '}\n' * output_dims

        # Make tasklet
        tnode = dace.nodes.Tasklet('reduce',
                                   {'_in': dace.pointer(input_data.dtype)},
                                   {'_out': dace.pointer(output_data.dtype)},
                                   code,
                                   language=dace.Language.CPP)

        # Rename outer connectors and add to node
        inedge._dst_conn = '_in'
        outedge._src_conn = '_out'
        node.add_in_connector('_in')
        node.add_out_connector('_out')

        return tnode
Esempio n. 18
0
    def generate_module(self, sdfg, state, kernel_name, name, subgraph,
                        parameters, module_stream, entry_stream, host_stream,
                        instrumentation_stream):
        """Generates a module that will run as a dataflow function in the FPGA
           kernel."""

        state_id = sdfg.node_id(state)
        dfg = sdfg.nodes()[state_id]

        kernel_args_call = []
        kernel_args_module = []
        for is_output, pname, p, interface_ids in parameters:
            if isinstance(p, dt.Array):
                for bank, interface_id in fpga.iterate_multibank_interface_ids(
                        p, interface_ids):
                    arr_name = fpga.fpga_ptr(pname,
                                             p,
                                             sdfg,
                                             bank,
                                             is_output,
                                             is_array_interface=True)
                    # Add interface ID to called module, but not to the module
                    # arguments
                    argname = fpga.fpga_ptr(pname,
                                            p,
                                            sdfg,
                                            bank,
                                            is_output,
                                            is_array_interface=True,
                                            interface_id=interface_id)

                    kernel_args_call.append(argname)
                    dtype = p.dtype
                    kernel_args_module.append("{} {}*{}".format(
                        dtype.ctype, "const " if not is_output else "",
                        arr_name))
            else:
                if isinstance(p, dt.Stream):
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    if p.is_stream_array():
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> {}[{}]".format(
                                p.dtype.base_type.ctype, cpp.sym2cpp(p.veclen),
                                cpp.sym2cpp(p.buffer_size), pname,
                                p.size_string()))
                    else:
                        kernel_args_module.append(
                            "dace::FIFO<{}, {}, {}> &{}".format(
                                p.dtype.base_type.ctype, cpp.sym2cpp(p.veclen),
                                cpp.sym2cpp(p.buffer_size), pname))
                else:
                    kernel_args_call.append(
                        p.as_arg(with_types=False, name=pname))
                    kernel_args_module.append(
                        p.as_arg(with_types=True, name=pname))

        # Check if we are generating an RTL module, in which case only the
        # accesses to the streams should be handled
        rtl_tasklet = None
        for n in subgraph.nodes():
            if (isinstance(n, dace.nodes.Tasklet)
                    and n.language == dace.dtypes.Language.SystemVerilog):
                rtl_tasklet = n
                break
        if rtl_tasklet:
            entry_stream.write(
                f'// [RTL] HLSLIB_DATAFLOW_FUNCTION({name}, {", ".join(kernel_args_call)});'
            )
            module_stream.write(
                f'// [RTL] void {name}({", ".join(kernel_args_module)});\n\n')

            # _1 in names are due to vitis
            for node in subgraph.source_nodes():
                if isinstance(sdfg.arrays[node.data], dt.Stream):
                    if node.data not in self._stream_connections:
                        self._stream_connections[node.data] = [None, None]
                    for edge in state.out_edges(node):
                        rtl_name = "{}_{}_{}_{}".format(
                            edge.dst, sdfg.sdfg_id, sdfg.node_id(state),
                            state.node_id(edge.dst))
                        self._stream_connections[
                            node.data][1] = '{}_top_1.s_axis_{}'.format(
                                rtl_name, edge.dst_conn)

            for node in subgraph.sink_nodes():
                if isinstance(sdfg.arrays[node.data], dt.Stream):
                    if node.data not in self._stream_connections:
                        self._stream_connections[node.data] = [None, None]
                    for edge in state.in_edges(node):
                        rtl_name = "{}_{}_{}_{}".format(
                            edge.src, sdfg.sdfg_id, sdfg.node_id(state),
                            state.node_id(edge.src))
                        self._stream_connections[
                            node.data][0] = '{}_top_1.m_axis_{}'.format(
                                rtl_name, edge.src_conn)

            # Make the dispatcher trigger generation of the RTL module, but
            # ignore the generated code, as the RTL codegen will generate the
            # appropriate files.
            ignore_stream = CodeIOStream()
            self._dispatcher.dispatch_subgraph(sdfg,
                                               subgraph,
                                               state_id,
                                               ignore_stream,
                                               ignore_stream,
                                               skip_entry_node=False)

            # Launch the kernel from the host code
            rtl_name = self.rtl_tasklet_name(rtl_tasklet, state, sdfg)
            host_stream.write(
                f"  auto kernel_{rtl_name} = program.MakeKernel(\"{rtl_name}_top\"{', '.join([''] + [name for _, name, p, _ in parameters if not isinstance(p, dt.Stream)])}).ExecuteTaskFork();",
                sdfg, state_id, rtl_tasklet)
            if state.instrument == dtypes.InstrumentationType.FPGA:
                self.instrument_opencl_kernel(rtl_name, state_id, sdfg.sdfg_id,
                                              instrumentation_stream)

            return

        # create a unique module name to prevent name clashes
        module_function_name = f"module_{name}_{sdfg.sdfg_id}"

        # Unrolling processing elements: if there first scope of the subgraph
        # is an unrolled map, generate a processing element for each iteration
        scope_children = subgraph.scope_children()
        top_scopes = [
            n for n in scope_children[None]
            if isinstance(n, dace.sdfg.nodes.EntryNode)
        ]
        unrolled_loops = 0
        if len(top_scopes) == 1:
            scope = top_scopes[0]
            if scope.unroll:
                self._unrolled_pes.add(scope.map)
                kernel_args_call += ", ".join(scope.map.params)
                kernel_args_module += ["int " + p for p in scope.params]
                for p, r in zip(scope.map.params, scope.map.range):
                    if len(r) > 3:
                        raise cgx.CodegenError("Strided unroll not supported")
                    entry_stream.write(
                        "for (size_t {param} = {begin}; {param} < {end}; "
                        "{param} += {increment}) {{\n#pragma HLS UNROLL".
                        format(param=p,
                               begin=r[0],
                               end=r[1] + 1,
                               increment=r[2]))
                    unrolled_loops += 1

        # Generate caller code in top-level function
        entry_stream.write(
            "HLSLIB_DATAFLOW_FUNCTION({}, {});".format(
                module_function_name, ", ".join(kernel_args_call)), sdfg,
            state_id)

        for _ in range(unrolled_loops):
            entry_stream.write("}")

        # ----------------------------------------------------------------------
        # Generate kernel code
        # ----------------------------------------------------------------------

        self._dispatcher.defined_vars.enter_scope(subgraph)

        module_body_stream = CodeIOStream()

        module_body_stream.write(
            "void {}({}) {{".format(module_function_name,
                                    ", ".join(kernel_args_module)), sdfg,
            state_id)

        # Register the array interface as a naked pointer for use inside the
        # FPGA kernel
        interfaces_added = set()
        for is_output, argname, arg, interface_id in parameters:
            for bank, _ in fpga.iterate_multibank_interface_ids(
                    arg, interface_id):
                if (not (isinstance(arg, dt.Array) and arg.storage
                         == dace.dtypes.StorageType.FPGA_Global)):
                    continue
                ctype = dtypes.pointer(arg.dtype).ctype
                ptr_name = fpga.fpga_ptr(argname,
                                         arg,
                                         sdfg,
                                         bank,
                                         is_output,
                                         None,
                                         is_array_interface=True)
                if not is_output:
                    ctype = f"const {ctype}"
                self._dispatcher.defined_vars.add(ptr_name,
                                                  DefinedType.Pointer, ctype)
                if argname in interfaces_added:
                    continue
                interfaces_added.add(argname)
                self._dispatcher.defined_vars.add(argname,
                                                  DefinedType.ArrayInterface,
                                                  ctype,
                                                  allow_shadowing=True)
        module_body_stream.write("\n")

        # Allocate local transients
        data_to_allocate = (set(subgraph.top_level_transients()) -
                            set(sdfg.shared_transients()) -
                            set([p[1] for p in parameters]))
        allocated = set()
        for node in subgraph.nodes():
            if not isinstance(node, dace.sdfg.nodes.AccessNode):
                continue
            if node.data not in data_to_allocate or node.data in allocated:
                continue
            allocated.add(node.data)
            self._dispatcher.dispatch_allocate(sdfg, state, state_id, node,
                                               node.desc(sdfg), module_stream,
                                               module_body_stream)

        self._dispatcher.dispatch_subgraph(sdfg,
                                           subgraph,
                                           state_id,
                                           module_stream,
                                           module_body_stream,
                                           skip_entry_node=False)

        module_stream.write(module_body_stream.getvalue(), sdfg, state_id)
        module_stream.write("}\n\n")

        self._dispatcher.defined_vars.exit_scope(subgraph)
Esempio n. 19
0
def generate_boundary_conditions(node, shape, field_accesses, field_to_desc,
                                 iterator_mapping):
    boundary_code = ""
    # Conditions where the output should not be written
    oob_cond = set()
    # Loop over each input
    for field_name in node.in_connectors:
        accesses = field_accesses[field_name]
        dtype = field_to_desc[field_name].dtype
        veclen = dtype.veclen
        iterators = iterator_mapping[field_name]
        num_dims = sum(iterators, 0)
        # Loop over each access to this data
        for indices, memlet_name in accesses.items():
            if len(indices) != num_dims:
                raise ValueError(f"Access {field_name}[{indices}] inconsistent "
                                 f"with iterator mapping {iterators}.")
            cond = set()
            cond_global = set()
            # Loop over each index of this access
            for i, offset in enumerate(indices):
                if i == len(indices) - 1 and dtype.veclen > 1:
                    unroll_boundary = f"*{dtype.veclen} + i_unroll"
                    unroll_write = f"*{dtype.veclen} - i_unroll"
                else:
                    unroll_boundary = ""
                    unroll_write = ""
                if offset < 0:
                    offset_str = sym2cpp(-offset)
                    term = f"_i{i}{unroll_boundary} < {offset_str}"
                    if i != len(indices) - 1:
                        offset_str = sym2cpp(-offset)
                        cond_global.add(f"_i{i} < {offset_str}")
                    elif offset <= -veclen:
                        offset_str = sym2cpp(-offset // veclen)
                        cond_global.add(f"_i{i} < {offset_str}")
                elif offset > 0:
                    offset_str = sym2cpp(shape[i] - offset)
                    term = f"_i{i}{unroll_boundary} >= {offset_str}"
                    if i != len(indices) - 1:
                        cond_global.add(f"_i{i} >= {offset_str}")
                    elif offset >= veclen:
                        offset_str = sym2cpp((shape[i] - offset) // veclen)
                        cond_global.add(f"_i{i} >= {offset_str}")
                else:
                    continue
                cond.add(term)
            if len(cond) == 0:
                boundary_code += "{} = _{}\n".format(memlet_name, memlet_name)
            else:
                if field_name in node.boundary_conditions:
                    bc = node.boundary_conditions[field_name]
                else:
                    bc = {"btype": "shrink"}
                btype = bc["btype"]
                if btype == "copy":
                    center_memlet = accesses[center]
                    boundary_val = "_{}".format(center_memlet)
                elif btype == "constant":
                    boundary_val = bc["value"]
                elif btype == "shrink":
                    # We don't need to do anything here, it's up to the
                    # user to not use the junk output
                    if np.issubdtype(dtype.type, np.floating):
                        boundary_val = np.finfo(dtype.type).min
                    else:
                        # If not a float, assume it's some kind of integer
                        boundary_val = np.iinfo(dtype.type).min
                    # Add this to the output condition
                    oob_cond |= cond_global
                else:
                    raise ValueError(
                        f"Unsupported boundary condition type: {btype}")
                boundary_code += ("{} = {} if {} else _{}\n".format(
                    memlet_name, boundary_val, " or ".join(sorted(cond)),
                    memlet_name))
    return boundary_code, oob_cond
Esempio n. 20
0
    def copy_memory(self, sdfg: SDFG, dfg: SDFGState, state_id: int,
                    src_node: nodes.Node, dst_node: nodes.Node,
                    edge: gr.MultiConnectorEdge[mm.Memlet],
                    function_stream: CodeIOStream,
                    callsite_stream: CodeIOStream):
        # We should always be in an SVE scope
        scope = util.get_sve_scope(sdfg, dfg, dst_node)
        if scope is None:
            raise NotImplementedError('Not in an SVE scope')

        in_conn = dst_node.in_connectors[edge.dst_conn]

        if isinstance(src_node, dace.nodes.Tasklet):
            # Copy from tasklet is just copying the shared register
            # Use defined_vars to get the C++ type of the shared register
            callsite_stream.write(
                f'{self.dispatcher.defined_vars.get(edge.data.data)[1]} {edge.dst_conn} = {edge.data.data};'
            )
            return

        if not isinstance(src_node, dace.nodes.AccessNode):
            raise util.NotSupportedError(
                'Copy neither from Tasklet nor AccessNode')

        src_desc = src_node.desc(sdfg)

        if isinstance(src_desc, dace.data.Stream):
            # A copy from a stream will trigger a vector pop
            raise NotImplementedError()

            # FIXME: Issue when we can pop different amounts of data!
            # If we limit to the smallest amount, certain data will be lost (never processed)
            """
            # SVE register where the stream will be popped to
            self.create_empty_definition(in_conn, edge, callsite_stream, output=True)

            var_name = edge.dst_conn

            callsite_stream.write(
                f'{util.TYPE_TO_SVE[in_conn.type]} {var_name};')

            callsite_stream.write('{')
            callsite_stream.write('// Stream pop')

            # Pop into local buffer
            # 256 // in_conn.vtype.bytes
            n_vec = f'{util.REGISTER_BYTE_SIZE} / {in_conn.vtype.bytes}'
            callsite_stream.write(f'{in_conn.vtype.ctype} __tmp[{n_vec}];')
            callsite_stream.write(
                f'size_t __cnt = {edge.data.data}.pop_try(__tmp, {n_vec});')

            # Limit the loop predicate
            loop_pred = util.get_loop_predicate(sdfg, dfg, dst_node)
            callsite_stream.write(
                f'{loop_pred} = svand_z({loop_pred}, {loop_pred}, svwhilelt_b{in_conn.vtype.bytes * 8}(0ll, __cnt));')

            # Transfer to register
            callsite_stream.write(f'{var_name} = svld1({loop_pred}, __tmp);')

            callsite_stream.write('}')
            """
            return

        if isinstance(in_conn, dtypes.vector):
            # Copy from vector, so we can use svld

            if in_conn.type not in util.TYPE_TO_SVE:
                raise NotImplementedError(
                    f'Data type {in_conn.type} not supported')

            self.dispatcher.defined_vars.add(edge.dst_conn, dtypes.vector,
                                             in_conn.ctype)

            # Determine the stride of the load and use a gather if applicable
            stride = self.get_load_stride(sdfg, dfg, dst_node, edge.data)

            # First part of the declaration is `type name`
            load_lhs = '{} {}'.format(util.TYPE_TO_SVE[in_conn.type],
                                      edge.dst_conn)

            ptr_cast = ''
            if in_conn.type == np.int64:
                ptr_cast = '(int64_t*) '
            elif in_conn.type == np.uint64:
                ptr_cast = '(uint64_t*) '

            # Regular load and gather share the first arguments
            load_args = '{}, {}'.format(
                util.get_loop_predicate(sdfg, dfg, dst_node), ptr_cast +
                cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer))

            if stride == 1:
                callsite_stream.write('{} = svld1({});'.format(
                    load_lhs, load_args))
            else:
                callsite_stream.write(
                    '{} = svld1_gather_index({}, svindex_s{}(0, {}));'.format(
                        load_lhs, load_args,
                        util.get_base_type(in_conn).bytes * 8, sym2cpp(stride)))
        else:
            # Any other copy (e.g. pointer or scalar) is handled by the default CPU codegen
            self.cpu_codegen.copy_memory(sdfg, dfg, state_id, src_node,
                                         dst_node, edge, function_stream,
                                         callsite_stream)
Esempio n. 21
0
    def generate_read(self, sdfg: SDFG, state: SDFGState, map: nodes.Map, edge: graph.MultiConnectorEdge[mm.Memlet],
                      code: CodeIOStream):
        """
            Responsible for generating code for reads into a Tasklet, given the ingoing edge.
        """
        if edge.dst_conn is None:
            return
        src_node = state.memlet_path(edge)[0].src
        dst_type = edge.dst.in_connectors[edge.dst_conn]
        dst_name = edge.dst_conn
        if isinstance(src_node, nodes.Tasklet):
            ##################
            # Code->Code edges
            src_type = edge.src.out_connectors[edge.src_conn]
            if util.is_vector(src_type) and util.is_vector(dst_type):
                # Directly read from shared vector register
                code.write(f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = {edge.data.data};')
            elif util.is_scalar(src_type) and util.is_scalar(dst_type):
                # Directly read from shared scalar register
                code.write(f'{dst_type} {dst_name} = {edge.data.data};')
            elif util.is_scalar(src_type) and util.is_vector(dst_type):
                # Scalar broadcast from shared scalar register
                code.write(
                    f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({edge.data.data});'
                )
            else:
                raise util.NotSupportedError('Unsupported Code->Code edge')
        elif isinstance(src_node, nodes.AccessNode):
            ##################
            # Read from AccessNode
            desc = src_node.desc(sdfg)
            if isinstance(desc, data.Array):
                # Copy from array
                if util.is_pointer(dst_type):
                    ##################
                    # Pointer reference
                    code.write(
                        f'{dst_type} {dst_name} = {cpp.cpp_ptr_expr(sdfg, edge.data, None, codegen=self.frame)};')
                elif util.is_vector(dst_type):
                    ##################
                    # Vector load

                    stride = edge.data.get_stride(sdfg, map)

                    # First part of the declaration is `type name`
                    load_lhs = '{} {}'.format(util.TYPE_TO_SVE[dst_type.type], dst_name)

                    # long long issue casting
                    ptr_cast = ''
                    if dst_type.type == np.int64:
                        ptr_cast = '(int64_t*) '
                    elif dst_type.type == np.uint64:
                        ptr_cast = '(uint64_t*) '

                    # Regular load and gather share the first arguments
                    load_args = '{}, {}'.format(
                        util.get_loop_predicate(sdfg, state, edge.dst),
                        ptr_cast + cpp.cpp_ptr_expr(sdfg, edge.data, DefinedType.Pointer, codegen=self.frame))

                    if stride == 1:
                        code.write('{} = svld1({});'.format(load_lhs, load_args))
                    else:
                        code.write('{} = svld1_gather_index({}, svindex_s{}(0, {}));'.format(
                            load_lhs, load_args,
                            util.get_base_type(dst_type).bytes * 8, sym2cpp(stride)))
                else:
                    ##################
                    # Scalar read from array
                    code.write(f'{dst_type} {dst_name} = {cpp.cpp_array_expr(sdfg, edge.data, codegen=self.frame)};')
            elif isinstance(desc, data.Scalar):
                # Refer to shared variable
                src_type = desc.dtype
                if util.is_vector(src_type) and util.is_vector(dst_type):
                    # Directly read from shared vector register
                    code.write(f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = {edge.data.data};')
                elif util.is_scalar(src_type) and util.is_scalar(dst_type):
                    # Directly read from shared scalar register
                    code.write(f'{dst_type} {dst_name} = {edge.data.data};')
                elif util.is_scalar(src_type) and util.is_vector(dst_type):
                    # Scalar broadcast from shared scalar register
                    code.write(
                        f'{util.TYPE_TO_SVE[dst_type.type]} {dst_name} = svdup_{util.TYPE_TO_SVE_SUFFIX[dst_type.type]}({edge.data.data});'
                    )
                else:
                    raise util.NotSupportedError('Unsupported Scalar->Code edge')
        else:
            raise util.NotSupportedError('Only copy from Tasklets and AccessNodes is supported')
Esempio n. 22
0
    def memlet_view_ctor(self, sdfg, memlet, direction):
        useskip = False
        memlet_params = []

        memlet_name = memlet.data
        if isinstance(sdfg.arrays[memlet.data], data.Scalar):
            raise ValueError("This should never have happened")

        if isinstance(memlet.subset, subsets.Indices):
            # Compute address
            memlet_params.append(cpp_array_expr(sdfg, memlet, False))
            dims = 0

        elif isinstance(memlet.subset, subsets.Range):
            dims = len(memlet.subset.ranges)
            #memlet_params.append("")

            # Dimensions to remove from view (due to having one value)
            indexdims = []
            nonIndexDims = []

            for dim, (rb, re, rs) in enumerate(memlet.subset.ranges):
                if rs != 1:
                    useskip = True
                try:
                    if (re - rb) == 0:
                        indexdims.append(dim)
                    else:
                        nonIndexDims.append(dim)
                except TypeError:  # cannot determine truth value of Relational
                    nonIndexDims.append(dim)

            if len(nonIndexDims) > 1 and len(indexdims) > 0:
                raise NotImplementedError(
                    'subviews of more than one dimension ' + 'not implemented')
            elif len(
                    nonIndexDims) == 1 and len(indexdims) > 0:  # One dimension
                indexdim = nonIndexDims[0]

                # Contiguous dimension
                if indexdim == dims - 1:
                    memlet_params[-1] += ' + %s' % cpp_array_expr(
                        sdfg, memlet, False)
                    memlet_params.append(
                        '0, %s' % (sym2cpp(memlet.subset.ranges[-1][1] -
                                           memlet.subset.ranges[-1][0])))
                else:  # Non-contiguous dimension
                    useskip = True
                    memlet_params[-1] += ' + %s' % cpp_array_expr(
                        sdfg, memlet, False)
                    memlet_range = memlet.subset.ranges[indexdim]

                    memlet_stride = sdfg.arrays[memlet.data].strides[indexdim]
                    memlet_stride = sym2cpp(memlet_stride)

                    memlet_params.append(
                        '0, %s, %s' %
                        (sym2cpp(memlet_range[1] - memlet_range[0]),
                         sym2cpp(memlet_stride)))

                # Subtract index dimensions from array dimensions
                dims -= len(indexdims)

            elif len(indexdims) == 0:
                for (rb, re, rs), s in zip(memlet.subset.ranges,
                                           sdfg.arrays[memlet.data].shape):
                    if useskip:
                        memlet_params.append(
                            '%s, %s, %s' %
                            (cppunparse.pyexpr2cpp(symbolic.symstr(rb)),
                             cppunparse.pyexpr2cpp(symbolic.symstr(s)),
                             cppunparse.pyexpr2cpp(symbolic.symstr(rs))))
                    else:
                        memlet_params.append(
                            '%s, %s' %
                            (cppunparse.pyexpr2cpp(symbolic.symstr(rb)),
                             cppunparse.pyexpr2cpp(symbolic.symstr(s))))
            elif len(nonIndexDims) == 0:  # Scalar view
                if len(memlet_params) > 0:
                    # Compute address
                    memlet_params[-1] += ' + ' + cpp_array_expr(
                        sdfg, memlet, False)
                else:
                    memlet_params.append(cpp_array_expr(sdfg, memlet, False))
                dims = 0

        else:
            raise RuntimeError('Memlet type "%s" not implemented' %
                               memlet.subset)

        if dims == 0:
            return 'dace::ArrayViewImmaterial%s%s<%s, %s, int32_t> ("%s", %s)' % (
                'In' if direction == "in" else "Out", 'Skip'
                if useskip else '', sdfg.arrays[memlet.data].dtype.ctype,
                symbolic.symstr(
                    memlet.veclen), memlet.data, ', '.join(memlet_params))
        else:
            return 'dace::ArrayViewImmaterial%s%s<%s, %s, int32_t, %s> ("%s", %s)' % (
                'In' if direction == "in" else "Out", 'Skip'
                if useskip else '', sdfg.arrays[memlet.data].dtype.ctype,
                symbolic.symstr(memlet.veclen), ', '.join([
                    str(s) for s in memlet.subset.bounding_box_size()
                ]), memlet.data, ', '.join(memlet_params))