Ejemplo n.º 1
0
def test_intersects_constant():
    rng1 = subsets.Range([(0, 4, 1)])
    rng2 = subsets.Range([(3, 4, 1)])
    rng3 = subsets.Range([(1, 5, 1)])
    rng4 = subsets.Range([(5, 7, 1)])
    ind1 = subsets.Indices([0])
    ind2 = subsets.Indices([1])
    ind3 = subsets.Indices([5])

    assert subsets.intersects(rng1, rng2) is True
    assert subsets.intersects(rng1, rng3) is True
    assert subsets.intersects(rng1, rng4) is False
    assert subsets.intersects(ind1, rng1) is True
    assert subsets.intersects(rng1, ind2) is True
    assert subsets.intersects(rng1, ind3) is False
Ejemplo n.º 2
0
def cpp_ptr_expr(sdfg,
                 memlet,
                 defined_type,
                 offset=None,
                 relative_offset=True,
                 use_other_subset=False,
                 indices=None,
                 is_write=None):
    """ Converts a memlet to a C++ pointer expression. """
    subset = memlet.subset if not use_other_subset else memlet.other_subset
    s = subset if relative_offset else subsets.Indices(offset)
    o = offset if relative_offset else None
    desc = sdfg.arrays[memlet.data]
    if isinstance(indices, str):
        offset_cppstr = indices
    else:
        offset_cppstr = cpp_offset_expr(desc, s, o, indices=indices)
    dname = ptr(memlet.data, desc)

    if defined_type == DefinedType.ArrayInterface:
        if is_write is None:
            raise ValueError("is_write must be set for ArrayInterface.")
        dname = array_interface_variable(dname, is_write, None)

    if defined_type == DefinedType.Scalar:
        dname = '&' + dname

    if offset_cppstr == '0':
        return dname
    else:
        return '%s + %s' % (dname, offset_cppstr)
Ejemplo n.º 3
0
def cpp_ptr_expr(sdfg,
                 memlet,
                 offset=None,
                 relative_offset=True,
                 use_other_subset=False,
                 indices=None):
    """ Converts a memlet to a C++ pointer expression. """
    subset = memlet.subset if not use_other_subset else memlet.other_subset
    s = subset if relative_offset else subsets.Indices(offset)
    o = offset if relative_offset else None
    if isinstance(indices, str):
        offset_cppstr = indices
    else:
        offset_cppstr = cpp_offset_expr(sdfg.arrays[memlet.data],
                                        s,
                                        o,
                                        indices=indices)
    dname = memlet.data
    if isinstance(sdfg.arrays[dname], data.Scalar):
        dname = '&' + dname

    if offset_cppstr == '0':
        return dname
    else:
        return '%s + %s' % (dname, offset_cppstr)
Ejemplo n.º 4
0
def copy_expr(
    dispatcher,
    sdfg,
    dataname,
    memlet,
    offset=None,
    relative_offset=True,
    packed_types=False,
):
    datadesc = sdfg.arrays[dataname]
    if relative_offset:
        s = memlet.subset
        o = offset
    else:
        if offset is None:
            s = None
        elif not isinstance(offset, subsets.Subset):
            s = subsets.Indices(offset)
        else:
            s = offset
        o = None
    if s is not None:
        offset_cppstr = cpp_offset_expr(datadesc, s, o)
    else:
        offset_cppstr = "0"
    dt = ""

    expr = dataname

    def_type, _ = dispatcher.defined_vars.get(dataname)

    add_offset = offset_cppstr != "0"

    if def_type in [DefinedType.Pointer, DefinedType.ArrayInterface]:
        return "{}{}{}".format(
            dt, expr, " + {}".format(offset_cppstr) if add_offset else "")

    elif def_type == DefinedType.StreamArray:
        return "{}[{}]".format(expr, offset_cppstr)

    elif def_type == DefinedType.FPGA_ShiftRegister:
        return expr

    elif def_type in [DefinedType.Scalar, DefinedType.Stream]:

        if add_offset:
            raise TypeError("Tried to offset address of scalar {}: {}".format(
                dataname, offset_cppstr))

        if def_type == DefinedType.Scalar:
            return "{}&{}".format(dt, expr)
        else:
            return dataname
    else:
        raise NotImplementedError("copy_expr not implemented "
                                  "for connector type: {}".format(def_type))
Ejemplo n.º 5
0
def _ndslice_to_subset(ndslice):
    is_tuple = [isinstance(x, tuple) for x in ndslice]
    if not any(is_tuple):
        return subsets.Indices(ndslice)
    else:
        if not all(is_tuple):
            # If a mix of ranges and indices is found, convert to range
            for i in range(len(ndslice)):
                if not is_tuple[i]:
                    ndslice[i] = (ndslice[i], ndslice[i], 1)
        return subsets.Range(ndslice)
Ejemplo n.º 6
0
def cpp_offset_expr(d: data.Data,
                    subset_in: subsets.Subset,
                    offset=None,
                    packed_veclen=1,
                    indices=None):
    """ Creates a C++ expression that can be added to a pointer in order
        to offset it to the beginning of the given subset and offset.
        :param d: The data structure to use for sizes/strides.
        :param subset_in: The subset to offset by.
        :param offset: An additional list of offsets or a Subset object
        :param packed_veclen: If packed types are targeted, specifies the
                              vector length that the final offset should be
                              divided by.
        :param indices: A tuple of indices to use for expression.
        :return: A string in C++ syntax with the correct offset
    """
    subset = copy.deepcopy(subset_in)

    # Offset according to parameters
    if offset is not None:
        if isinstance(offset, subsets.Subset):
            subset.offset(offset, False)
        else:
            subset.offset(subsets.Indices(offset), False)

    # Then, offset according to array
    subset.offset(subsets.Indices(d.offset), False)

    # Obtain start range from offsetted subset
    indices = indices or ([0] * len(d.strides))

    index = subset.at(indices, d.strides)
    if packed_veclen > 1:
        index /= packed_veclen

    return sym2cpp(index)
Ejemplo n.º 7
0
def pop_dims(subset, dims):
    popped = []
    if isinstance(subset, subsets.Indices):
        indices = copy.deepcopy(subsets.Indices)
        for i in dims:
            popped.append(indices.pop(i))
        return subsets.Indices(indices)
    else:
        ranges = copy.deepcopy(subset.ranges)
        tsizes = copy.deepcopy(subset.tile_sizes)
        for i in dims:
            r = ranges.pop(i)
            t = tsizes.pop(i)
            popped.append((r, t))
        new_subset = subsets.Range(ranges)
        new_subset.tile_sizes = tsizes
        return new_subset, popped
Ejemplo n.º 8
0
def cpp_array_expr(sdfg,
                   memlet,
                   with_brackets=True,
                   offset=None,
                   relative_offset=True,
                   packed_veclen=1,
                   use_other_subset=False,
                   indices=None):
    """ Converts an Indices/Range object to a C++ array access string. """
    subset = memlet.subset if not use_other_subset else memlet.other_subset
    s = subset if relative_offset else subsets.Indices(offset)
    o = offset if relative_offset else None
    desc = sdfg.arrays[memlet.data]
    offset_cppstr = cpp_offset_expr(desc, s, o, packed_veclen, indices=indices)

    if with_brackets:
        ptrname = ptr(memlet.data, desc)
        return "%s[%s]" % (ptrname, offset_cppstr)
    else:
        return offset_cppstr
Ejemplo n.º 9
0
def compose_and_push_back(first, second, dims=None, popped=None):
    if isinstance(first, subsets.Indices):
        subset = first.new_offset(second, negative=False)
    else:
        subset = first.compose(second)
    if dims and popped and len(dims) == len(popped):
        if isinstance(first, subsets.Indices):
            indices = subset.Indices
            for d, p in zip(reversed(dims), reversed(popped)):
                indices.insert(d, p)
            subset = subsets.Indices(indices)
        else:
            ranges = subset.ranges
            tsizes = subset.tile_sizes
            for d, (r, t) in zip(reversed(dims), reversed(popped)):
                ranges.insert(d, r)
                tsizes.insert(d, t)
            subset = subsets.Range(ranges)
            subset.tile_sizes = tsizes
    return subset
Ejemplo n.º 10
0
def test_intersects_symbolic():
    N, M = dace.symbol('N', positive=True), dace.symbol('M', positive=True)
    rng1 = subsets.Range([(0, N - 1, 1), (0, M - 1, 1)])
    rng2 = subsets.Range([(0, 0, 1), (0, 0, 1)])
    rng3_1 = subsets.Range([(N, N, 1), (0, 1, 1)])
    rng3_2 = subsets.Range([(0, 1, 1), (M, M, 1)])
    rng4 = subsets.Range([(N, N, 1), (M, M, 1)])
    rng5 = subsets.Range([(0, 0, 1), (M, M, 1)])
    rng6 = subsets.Range([(0, N, 1), (0, M, 1)])
    rng7 = subsets.Range([(0, N - 1, 1), (N - 1, N, 1)])
    ind1 = subsets.Indices([0, 1])

    assert subsets.intersects(rng1, rng2) is True
    assert subsets.intersects(rng1, rng3_1) is False
    assert subsets.intersects(rng1, rng3_2) is False
    assert subsets.intersects(rng1, rng4) is False
    assert subsets.intersects(rng1, rng5) is False
    assert subsets.intersects(rng6, rng1) is True
    assert subsets.intersects(rng1, rng7) is None
    assert subsets.intersects(rng7, rng1) is None
    assert subsets.intersects(rng1, ind1) is None
    assert subsets.intersects(ind1, rng1) is None
Ejemplo n.º 11
0
Archivo: cpp.py Proyecto: orausch/dace
def copy_expr(
    dispatcher,
    sdfg,
    dataname,
    memlet,
    offset=None,
    relative_offset=True,
    packed_types=False,
):
    datadesc = sdfg.arrays[dataname]
    if relative_offset:
        s = memlet.subset
        o = offset
    else:
        if offset is None:
            s = None
        elif not isinstance(offset, subsets.Subset):
            s = subsets.Indices(offset)
        else:
            s = offset
        o = None
    if s is not None:
        offset_cppstr = cpp_offset_expr(datadesc, s, o,
                                        memlet.veclen if packed_types else 1)
    else:
        offset_cppstr = "0"
    dt = ""

    if memlet.veclen != 1 and not packed_types:
        offset_cppstr = "(%s) / %s" % (offset_cppstr, sym2cpp(memlet.veclen))
        dt = "(dace::vec<%s, %s> *)" % (
            datadesc.dtype.ctype,
            sym2cpp(memlet.veclen),
        )

    expr = dataname

    def_type = dispatcher.defined_vars.get(dataname)

    add_offset = offset_cppstr != "0"

    if def_type == DefinedType.Pointer:
        return "{}{}{}".format(
            dt, expr, " + {}".format(offset_cppstr) if add_offset else "")

    elif def_type == DefinedType.ArrayView:
        return "{}{}.ptr(){}".format(
            dt, expr, " + {}".format(offset_cppstr) if add_offset else "")

    elif def_type == DefinedType.StreamArray:
        return "{}[{}]".format(expr, offset_cppstr)

    elif def_type in [
            DefinedType.Scalar, DefinedType.Stream, DefinedType.StreamView
    ]:

        if add_offset:
            raise TypeError("Tried to offset address of scalar {}: {}".format(
                dataname, offset_cppstr))

        if def_type == DefinedType.Scalar:
            return "{}&{}".format(dt, expr)
        else:
            return dataname

    else:
        raise NotImplementedError("copy_expr not implemented "
                                  "for connector type: {}".format(def_type))
Ejemplo n.º 12
0
def add_indirection_subgraph(sdfg, graph, src, dst, memlet):
    """ Replaces the specified edge in the specified graph with a subgraph that
        implements indirection without nested AST memlet objects. """
    if not isinstance(memlet, astnodes._Memlet):
        raise TypeError("Expected memlet to be astnodes._Memlet")

    indirect_inputs = set()
    indirect_outputs = set()

    # Scheme for multi-array indirection:
    # 1. look for all arrays and accesses, create set of arrays+indices
    #    from which the index memlets will be constructed from
    # 2. each separate array creates a memlet, of which num_accesses = len(set)
    # 3. one indirection tasklet receives them all + original array and
    #    produces the right output index/range memlet
    #########################
    # Step 1
    accesses = OrderedDict()
    newsubset = dcpy(memlet.subset)
    for dimidx, dim in enumerate(memlet.subset):
        # Range/Index disambiguation
        direct_assignment = False
        if not isinstance(dim, tuple):
            dim = [dim]
            direct_assignment = True

        for i, r in enumerate(dim):
            for expr in sympy.preorder_traversal(r):
                if symbolic.is_sympy_userfunction(expr):
                    fname = expr.func.__name__
                    if fname not in accesses:
                        accesses[fname] = []

                    # Replace function with symbol (memlet local name to-be)
                    if expr.args in accesses[fname]:
                        aindex = accesses[fname].index(expr.args)
                        toreplace = 'index_' + fname + '_' + str(aindex)
                    else:
                        accesses[fname].append(expr.args)
                        toreplace = 'index_' + fname + '_' + str(
                            len(accesses[fname]) - 1)

                    if direct_assignment:
                        newsubset[dimidx] = r.subs(expr, toreplace)
                    else:
                        newsubset[dimidx][i] = r.subs(expr, toreplace)
    #########################
    # Step 2
    ind_inputs = {'__ind_' + memlet.local_name}
    ind_outputs = {'lookup'}
    # Add accesses to inputs
    for arrname, arr_accesses in accesses.items():
        for i in range(len(arr_accesses)):
            ind_inputs.add('index_%s_%d' % (arrname, i))

    tasklet = nd.Tasklet("Indirection", ind_inputs, ind_outputs)

    input_index_memlets = []
    for arrname, arr_accesses in accesses.items():
        arr = memlet.otherdeps[arrname]
        for i, access in enumerate(arr_accesses):
            # Memlet to load the indirection index
            indexMemlet = Memlet(arrname, 1, sbs.Indices(list(access)), 1)
            input_index_memlets.append(indexMemlet)
            graph.add_edge(src, None, tasklet, "index_%s_%d" % (arrname, i),
                           indexMemlet)

    #########################
    # Step 3
    # Create new tasklet that will perform the indirection
    indirection_ast = ast.parse("lookup = {arr}[{index}]".format(
        arr='__ind_' + memlet.local_name,
        index=', '.join([symbolic.symstr(s) for s in newsubset])))
    # Conserve line number of original indirection code
    tasklet.code = ast.copy_location(indirection_ast.body[0], memlet.ast)

    # Create transient variable to trigger the indirected load
    if memlet.num_accesses == 1:
        storage = sdfg.add_scalar('__' + memlet.local_name + '_value',
                                  memlet.data.dtype,
                                  transient=True)
    else:
        storage = sdfg.add_array('__' + memlet.local_name + '_value',
                                 memlet.data.dtype,
                                 storage=types.StorageType.Default,
                                 transient=True,
                                 shape=memlet.bounding_box_size())
    indirectRange = sbs.Range([(0, s - 1, 1) for s in storage.shape])
    dataNode = nd.AccessNode('__' + memlet.local_name + '_value')

    # Create memlet that depends on the full array that we look up in
    fullRange = sbs.Range([(0, s - 1, 1) for s in memlet.data.shape])
    fullMemlet = Memlet(memlet.dataname, memlet.num_accesses, fullRange,
                        memlet.veclen)
    graph.add_edge(src, None, tasklet, '__ind_' + memlet.local_name,
                   fullMemlet)

    # Memlet to store the final value into the transient, and to load it into
    # the tasklet that needs it
    indirectMemlet = Memlet('__' + memlet.local_name + '_value',
                            memlet.num_accesses, indirectRange, memlet.veclen)
    graph.add_edge(tasklet, 'lookup', dataNode, None, indirectMemlet)

    valueMemlet = Memlet('__' + memlet.local_name + '_value',
                         memlet.num_accesses, indirectRange, memlet.veclen)
    graph.add_edge(dataNode, None, dst, memlet.local_name, valueMemlet)
Ejemplo n.º 13
0
    def expansion(node, parent_state, parent_sdfg):
        inp_buffer, out_buffer = node.validate(parent_sdfg, parent_state)
        redistr = parent_sdfg.rdistrarrays[node.redistr]
        array_a = parent_sdfg.subarrays[redistr.array_a]
        array_b = parent_sdfg.subarrays[redistr.array_b]

        inp_symbols = [
            symbolic.symbol(f"__inp_s{i}")
            for i in range(len(inp_buffer.shape))
        ]
        out_symbols = [
            symbolic.symbol(f"__out_s{i}")
            for i in range(len(out_buffer.shape))
        ]
        inp_subset = subsets.Indices(inp_symbols)
        out_subset = subsets.Indices(out_symbols)
        inp_offset = cpp.cpp_offset_expr(inp_buffer, inp_subset)
        out_offset = cpp.cpp_offset_expr(out_buffer, out_subset)
        print(inp_offset)
        print(out_offset)
        inp_repl = ""
        for i, s in enumerate(inp_symbols):
            inp_repl += f"int {s} = __state->{node.redistr}_self_src[__idx * {len(inp_buffer.shape)} + {i}];\n"
        out_repl = ""
        for i, s in enumerate(out_symbols):
            out_repl += f"int {s} = __state->{node.redistr}_self_dst[__idx * {len(out_buffer.shape)} + {i}];\n"
        copy_args = ", ".join([
            f"__state->{node.redistr}_self_size[__idx * {len(inp_buffer.shape)} + {i}], {istride}, {ostride}"
            for i, (istride, ostride
                    ) in enumerate(zip(inp_buffer.strides, out_buffer.strides))
        ])

        code = f"""
            int myrank;
            MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
            MPI_Request* req = new MPI_Request[__state->{node._redistr}_sends];
            MPI_Status* status = new MPI_Status[__state->{node._redistr}_sends];
            MPI_Status recv_status;
            if (__state->{array_a.pgrid}_valid) {{
                for (auto __idx = 0; __idx < __state->{node._redistr}_sends; ++__idx) {{
                    // printf("({redistr.array_a} -> {redistr.array_b}) I am rank %d and I send to %d\\n", myrank, __state->{node._redistr}_dst_ranks[__idx]);
                    // fflush(stdout);
                    MPI_Isend(_inp_buffer, 1, __state->{node._redistr}_send_types[__idx], __state->{node._redistr}_dst_ranks[__idx], 0, MPI_COMM_WORLD, &req[__idx]);
                }}
            }}
            if (__state->{array_b.pgrid}_valid) {{
                for (auto __idx = 0; __idx < __state->{node._redistr}_self_copies; ++__idx) {{
                    // printf("({redistr.array_a} -> {redistr.array_b}) I am rank %d and I self-copy\\n", myrank);
                    // fflush(stdout);
                    {inp_repl}
                    {out_repl}
                    dace::CopyNDDynamic<{inp_buffer.dtype.ctype}, 1, false, {len(inp_buffer.shape)}>::Dynamic::Copy(
                        _inp_buffer + {inp_offset}, _out_buffer + {out_offset}, {copy_args}
                    );
                }}
                for (auto __idx = 0; __idx < __state->{node._redistr}_recvs; ++__idx) {{
                    // printf("({redistr.array_a} -> {redistr.array_b}) I am rank %d and I receive from %d\\n", myrank, __state->{node._redistr}_src_ranks[__idx]);
                    // fflush(stdout);
                    MPI_Recv(_out_buffer, 1, __state->{node._redistr}_recv_types[__idx], __state->{node._redistr}_src_ranks[__idx], 0, MPI_COMM_WORLD, &recv_status);
                }}
            }}
            if (__state->{array_a.pgrid}_valid) {{
                MPI_Waitall(__state->{node._redistr}_sends, req, status);
                delete[] req;
                delete[] status;
            }}
            // printf("I am rank %d and I finished the redistribution {redistr.array_a} -> {redistr.array_b}\\n", myrank);
            // fflush(stdout);
            
        """

        tasklet = nodes.Tasklet(node.name,
                                node.in_connectors,
                                node.out_connectors,
                                code,
                                language=dtypes.Language.CPP)
        return tasklet
Ejemplo n.º 14
0
    def apply(self, sdfg):
        def gnode(nname):
            return graph.nodes()[self.subgraph[nname]]

        expr_index = self.expr_index
        graph = sdfg.nodes()[self.state_id]
        tasklet = gnode(MapReduceFusion._tasklet)
        tmap_exit = graph.nodes()[self.subgraph[MapReduceFusion._tmap_exit]]
        in_array = graph.nodes()[self.subgraph[MapReduceFusion._in_array]]
        if expr_index == 0:  # Reduce without outer map
            rmap_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_entry]]
        elif expr_index == 1:  # Reduce with outer map
            rmap_out_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_out_entry]]
            rmap_out_exit = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_out_exit]]
            rmap_in_entry = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_entry]]
            rmap_tasklet = graph.nodes()[self.subgraph[
                MapReduceFusion._rmap_in_tasklet]]

        if expr_index == 2:
            rmap_cr = graph.nodes()[self.subgraph[MapReduceFusion._reduce]]
        else:
            rmap_cr = graph.nodes()[self.subgraph[MapReduceFusion._rmap_in_cr]]
        out_array = gnode(MapReduceFusion._out_array)

        # Set nodes to remove according to the expression index
        nodes_to_remove = [in_array]
        if expr_index == 0:
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_in_entry))
        elif expr_index == 1:
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_out_entry))
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_in_entry))
            nodes_to_remove.append(gnode(MapReduceFusion._rmap_out_exit))
        else:
            nodes_to_remove.append(gnode(MapReduceFusion._reduce))

        # If no other edges lead to mapexit, remove it. Otherwise, keep
        # it and remove reduction incoming/outgoing edges
        if expr_index != 2 and len(graph.in_edges(tmap_exit)) == 1:
            nodes_to_remove.append(tmap_exit)

        memlet_edge = None
        for edge in graph.in_edges(tmap_exit):
            if edge.data.data == in_array.data:
                memlet_edge = edge
                break
        if memlet_edge is None:
            raise RuntimeError('Reduction memlet cannot be None')

        if expr_index == 0:  # Reduce without outer map
            # Index order does not matter, merge as-is
            pass
        elif expr_index == 1:  # Reduce with outer map
            tmap = tmap_exit.map
            perm_outer, perm_inner = MapReduceFusion.find_permutation(
                tmap, rmap_out_entry.map, rmap_in_entry.map, memlet_edge.data)

            # Split tasklet map into tmap_out -> tmap_in (according to
            # reduction)
            omap = nodes.Map(
                tmap.label + '_nonreduce',
                [p for i, p in enumerate(tmap.params) if i in perm_outer],
                [r for i, r in enumerate(tmap.range) if i in perm_outer],
                tmap.schedule, tmap.unroll, tmap.is_async)
            tmap.params = [
                p for i, p in enumerate(tmap.params) if i in perm_inner
            ]
            tmap.range = [
                r for i, r in enumerate(tmap.range) if i in perm_inner
            ]
            omap_entry = nodes.MapEntry(omap)
            omap_exit = rmap_out_exit
            rmap_out_exit.map = omap

            # Reconnect graph to new map
            tmap_entry = graph.entry_node(tmap_exit)
            tmap_in_edges = list(graph.in_edges(tmap_entry))
            for e in tmap_in_edges:
                nxutil.change_edge_dest(graph, tmap_entry, omap_entry)
            for e in tmap_in_edges:
                graph.add_edge(omap_entry, e.src_conn, tmap_entry, e.dst_conn,
                               copy.copy(e.data))
        elif expr_index == 2:  # Reduce node
            # Find correspondence between map indices and array outputs
            tmap = tmap_exit.map
            perm = MapReduceFusion.find_permutation_reduce(
                tmap, rmap_cr, graph, memlet_edge.data)

            output_subset = [tmap.params[d] for d in perm]
            if len(output_subset) == 0:  # Output is a scalar
                output_subset = [0]

            array_edge = graph.out_edges(rmap_cr)[0]

            # Delete relevant edges and nodes
            graph.remove_edge(memlet_edge)
            graph.remove_nodes_from(nodes_to_remove)

            # Add new edges and nodes
            #   From tasklet to map exit
            graph.add_edge(
                memlet_edge.src, memlet_edge.src_conn, memlet_edge.dst,
                memlet_edge.dst_conn,
                Memlet(out_array.data, memlet_edge.data.num_accesses,
                       subsets.Indices(output_subset), memlet_edge.data.veclen,
                       rmap_cr.wcr, rmap_cr.identity))

            #   From map exit to output array
            graph.add_edge(
                memlet_edge.dst, 'OUT_' + memlet_edge.dst_conn[3:],
                array_edge.dst, array_edge.dst_conn,
                Memlet(array_edge.data.data, array_edge.data.num_accesses,
                       array_edge.data.subset, array_edge.data.veclen,
                       rmap_cr.wcr, rmap_cr.identity))

            return

        # Remove tmp array node prior to the others, so that a new one
        # can be created in its stead (see below)
        graph.remove_node(nodes_to_remove[0])
        nodes_to_remove = nodes_to_remove[1:]

        # Create tasklet -> tmp -> tasklet connection
        tmp = graph.add_array(
            'tmp',
            memlet_edge.data.subset.bounding_box_size(),
            sdfg.arrays[memlet_edge.data.data].dtype,
            transient=True)
        tasklet_tmp_memlet = copy.deepcopy(memlet_edge.data)
        tasklet_tmp_memlet.data = tmp.data
        tasklet_tmp_memlet.subset = ShapeProperty.to_string(tmp.shape)

        # Modify memlet to point to output array
        memlet_edge.data.data = out_array.data

        # Recover reduction axes from CR reduce subset
        reduce_cr_subset = graph.in_edges(rmap_tasklet)[0].data.subset
        reduce_axes = []
        for ind, crvar in enumerate(reduce_cr_subset.indices):
            if '__i' in str(crvar):
                reduce_axes.append(ind)

        # Modify memlet access index by filtering out reduction axes
        if True:  # expr_index == 0:
            newindices = []
            for ind, ovar in enumerate(memlet_edge.data.subset.indices):
                if ind not in reduce_axes:
                    newindices.append(ovar)
        if len(newindices) == 0:
            newindices = [0]

        memlet_edge.data.subset = subsets.Indices(newindices)

        graph.remove_edge(memlet_edge)

        graph.add_edge(memlet_edge.src, memlet_edge.src_conn, tmp,
                       memlet_edge.dst_conn, tasklet_tmp_memlet)

        red_edges = list(graph.in_edges(rmap_tasklet))
        if len(red_edges) != 1:
            raise RuntimeError('CR edge must be unique')

        tmp_tasklet_memlet = copy.deepcopy(tasklet_tmp_memlet)
        graph.add_edge(tmp, None, rmap_tasklet, red_edges[0].dst_conn,
                       tmp_tasklet_memlet)

        for e in graph.edges_between(rmap_tasklet, rmap_cr):
            e.data.subset = memlet_edge.data.subset

        # Move output edges to point directly to CR node
        if expr_index == 1:
            # Set output memlet between CR node and outer reduction map to
            # contain the same subset as the one pointing to the CR node
            for e in graph.out_edges(rmap_cr):
                e.data.subset = memlet_edge.data.subset

            rmap_out = gnode(MapReduceFusion._rmap_out_exit)
            nxutil.change_edge_src(graph, rmap_out, omap_exit)

        # Remove nodes
        graph.remove_nodes_from(nodes_to_remove)

        # For unrelated outputs, connect original output to rmap_out
        if expr_index == 1 and tmap_exit not in nodes_to_remove:
            other_out_edges = list(graph.out_edges(tmap_exit))
            for e in other_out_edges:
                graph.remove_edge(e)
                graph.add_edge(e.src, e.src_conn, omap_exit, None, e.data)
                graph.add_edge(omap_exit, None, e.dst, e.dst_conn,
                               copy.copy(e.data))
Ejemplo n.º 15
0
    def apply(self, sdfg):
        """ The method creates two nested maps. The inner map ranges over the
            reduction axes, while the outer map ranges over the rest of the 
            input dimensions. The inner map contains a trivial tasklet, while
            the outgoing edges copy the reduction WCR.
        """
        graph = sdfg.nodes()[self.state_id]
        red_node = graph.nodes()[self.subgraph[ReduceExpansion._reduce]]

        inputs = []
        in_memlets = []
        for src, _, _, _, memlet in graph.in_edges(red_node):
            if src not in inputs:
                inputs.append(src)
                in_memlets.append(memlet)
        if len(inputs) > 1:
            raise NotImplementedError

        outputs = []
        out_memlets = []
        for _, _, dst, _, memlet in graph.out_edges(red_node):
            if dst not in outputs:
                outputs.append(dst)
                out_memlets.append(memlet)
        if len(outputs) > 1:
            raise NotImplementedError

        axes = red_node.axes
        if axes is None:
            axes = tuple(i for i in range(in_memlets[0].subset.dims()))

        outer_map_range = {}
        inner_map_range = {}
        for idx, r in enumerate(in_memlets[0].subset):
            if idx in axes:
                inner_map_range.update({
                    "__dim_{}".format(str(idx)):
                    subsets.Range.dim_to_string(r)
                })
            else:
                outer_map_range.update({
                    "__dim_{}".format(str(idx)):
                    subsets.Range.dim_to_string(r)
                })

        if len(outer_map_range) > 0:
            outer_map_entry, outer_map_exit = graph.add_map(
                'reduce_outer', outer_map_range, schedule=red_node.schedule)

        inner_map_entry, inner_map_exit = graph.add_map(
            'reduce_inner',
            inner_map_range,
            schedule=(dtypes.ScheduleType.Default
                      if len(outer_map_range) > 0 else red_node.schedule))

        tasklet = graph.add_tasklet(name='red_tasklet',
                                    inputs={'in_1'},
                                    outputs={'out_1'},
                                    code='out_1 = in_1')

        inner_map_entry.in_connectors = {'IN_1'}
        inner_map_entry.out_connectors = {'OUT_1'}

        outer_in_memlet = dcpy(in_memlets[0])

        if len(outer_map_range) > 0:
            outer_map_entry.in_connectors = {'IN_1'}
            outer_map_entry.out_connectors = {'OUT_1'}
            graph.add_edge(inputs[0], None, outer_map_entry, 'IN_1',
                           outer_in_memlet)
        else:
            graph.add_edge(inputs[0], None, inner_map_entry, 'IN_1',
                           outer_in_memlet)

        med_in_memlet = dcpy(in_memlets[0])
        med_in_range = []
        for idx, r in enumerate(med_in_memlet.subset):
            if idx in axes:
                med_in_range.append(r)
            else:
                med_in_range.append(("__dim_{}".format(str(idx)),
                                     "__dim_{}".format(str(idx)), 1))
        med_in_memlet.subset = subsets.Range(med_in_range)
        med_in_memlet.num_accesses = med_in_memlet.subset.num_elements()

        if len(outer_map_range) > 0:
            graph.add_edge(outer_map_entry, 'OUT_1', inner_map_entry, 'IN_1',
                           med_in_memlet)

        inner_in_memlet = dcpy(med_in_memlet)
        inner_in_idx = []
        for idx in range(len(inner_in_memlet.subset)):
            inner_in_idx.append("__dim_{}".format(str(idx)))
        inner_in_memlet.subset = subsets.Indices(inner_in_idx)
        inner_in_memlet.num_accesses = inner_in_memlet.subset.num_elements()
        graph.add_edge(inner_map_entry, 'OUT_1', tasklet, 'in_1',
                       inner_in_memlet)
        inner_map_exit.in_connectors = {'IN_1'}
        inner_map_exit.out_connectors = {'OUT_1'}

        inner_out_memlet = dcpy(out_memlets[0])
        inner_out_idx = []
        for idx, r in enumerate(inner_in_memlet.subset):
            if idx not in axes:
                inner_out_idx.append(r)
        if len(inner_out_idx) == 0:
            inner_out_idx = [0]

        inner_out_memlet.subset = subsets.Indices(inner_out_idx)
        inner_out_memlet.wcr = red_node.wcr
        inner_out_memlet.num_accesses = inner_out_memlet.subset.num_elements()
        graph.add_edge(tasklet, 'out_1', inner_map_exit, 'IN_1',
                       inner_out_memlet)

        outer_out_memlet = dcpy(out_memlets[0])
        outer_out_range = []
        for idx, r in enumerate(outer_out_memlet.subset):
            if idx not in axes:
                outer_out_range.append(r)
        if len(outer_out_range) == 0:
            outer_out_range = [(0, 0, 1)]

        outer_out_memlet.subset = subsets.Range(outer_out_range)
        outer_out_memlet.wcr = red_node.wcr

        if len(outer_map_range) > 0:
            outer_map_exit.in_connectors = {'IN_1'}
            outer_map_exit.out_connectors = {'OUT_1'}
            med_out_memlet = dcpy(inner_out_memlet)
            med_out_memlet.num_accesses = med_out_memlet.subset.num_elements()
            graph.add_edge(inner_map_exit, 'OUT_1', outer_map_exit, 'IN_1',
                           med_out_memlet)

            graph.add_edge(outer_map_exit, 'OUT_1', outputs[0], None,
                           outer_out_memlet)
        else:
            graph.add_edge(inner_map_exit, 'OUT_1', outputs[0], None,
                           outer_out_memlet)

        graph.remove_edge(graph.in_edges(red_node)[0])
        graph.remove_edge(graph.out_edges(red_node)[0])
        graph.remove_node(red_node)
Ejemplo n.º 16
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        if self.expr_index == 0:
            cnode = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._map_entry]]
            node_schedprop = cnode.map
            exit_nodes = graph.exit_nodes(cnode)
        else:
            cnode = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._reduce]]
            node_schedprop = cnode
            exit_nodes = [cnode]

        # Change schedule
        node_schedprop._schedule = dtypes.ScheduleType.GPU_Device
        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._maps_transformed += 1
        # If nested graph is designated as sequential, transform schedules and
        # storage from Default to Sequential/Register
        if self.nested_seq and self.expr_index == 0:
            for node in graph.scope_subgraph(cnode).nodes():
                if isinstance(node, nodes.AccessNode):
                    arr = node.desc(sdfg)
                    if arr.storage == dtypes.StorageType.Default:
                        arr.storage = dtypes.StorageType.Register
                elif isinstance(node, nodes.MapEntry):
                    if node.map.schedule == dtypes.ScheduleType.Default:
                        node.map.schedule = dtypes.ScheduleType.Sequential

        gpu_storage_types = [
            dtypes.StorageType.GPU_Global,
            dtypes.StorageType.GPU_Shared,
            dtypes.StorageType.GPU_Stack,
        ]

        #######################################################
        # Add GPU copies of CPU arrays (i.e., not already on GPU)

        # First, understand which arrays to clone
        all_out_edges = []
        for enode in exit_nodes:
            all_out_edges.extend(list(graph.out_edges(enode)))
        in_arrays_to_clone = set()
        out_arrays_to_clone = set()
        for e in graph.in_edges(cnode):
            data_node = sd.find_input_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                in_arrays_to_clone.add((data_node, e.data))
        for e in all_out_edges:
            data_node = sd.find_output_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                out_arrays_to_clone.add((data_node, e.data))

        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._arrays_removed += len(
                in_arrays_to_clone) + len(out_arrays_to_clone)

        # Second, create a GPU clone of each array
        # TODO: Overapproximate union of memlets
        cloned_arrays = {}
        in_cloned_arraynodes = {}
        out_cloned_arraynodes = {}
        for array_node, memlet in in_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        materialize_func=array.materialize_func,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)

            in_cloned_arraynodes[array_node.data] = cloned_node
        for array_node, memlet in out_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        materialize_func=array.materialize_func,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)
            cloned_node.setzero = True

            out_cloned_arraynodes[array_node.data] = cloned_node

        # Third, connect the cloned arrays to the originals
        for array_name, node in in_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in graph.in_edges(cnode):
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   newmemlet)

                    for e in graph.bfs_edges(edge.dst, reverse=False):
                        parent, _, _child, _, memlet = e
                        if parent != edge.dst and not in_scope(
                                graph, parent, edge.dst):
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[-1].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.ExitNode, forward=True):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   edge.data)
                    graph.remove_edge(edge)

        for array_name, node in out_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in all_out_edges:
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   newmemlet)

                    end_node = graph.scope_dict()[edge.src]
                    for e in graph.bfs_edges(edge.src, reverse=True):
                        parent, _, _child, _, memlet = e
                        if parent == end_node:
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[0].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.EntryNode,
                                       forward=False):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    edge.data.wcr = None
                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   edge.data)
                    graph.remove_edge(edge)

        # Fourth, replace memlet arrays as necessary
        if self.expr_index == 0:
            scope_subgraph = graph.scope_subgraph(cnode)
            for edge in scope_subgraph.edges():
                if edge.data.data is not None and edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]
Ejemplo n.º 17
0
def _validate_subsets(edge: graph.MultiConnectorEdge,
                      arrays: typing.Dict[str, data.Data],
                      src_name: str = None,
                      dst_name: str = None) -> typing.Tuple[subsets.Subset]:
    """ Extracts and validates src and dst subsets from the edge. """

    # Find src and dst names
    if not src_name and isinstance(edge.src, nodes.AccessNode):
        src_name = edge.src.data
    if not dst_name and isinstance(edge.dst, nodes.AccessNode):
        dst_name = edge.dst.data
    if not src_name and not dst_name:
        raise NotImplementedError

    # Find the src and dst subsets (deep-copy to allow manipulation)
    src_subset = copy.deepcopy(edge.data.src_subset)
    dst_subset = copy.deepcopy(edge.data.dst_subset)

    if not src_subset and not dst_subset:
        # NOTE: This should never happen
        raise NotImplementedError
    # NOTE: If any of the subsets is None, it means that we proceed in
    # experimental mode. The base case here is that we just copy the other
    # subset. However, if we can locate the other array, we check the
    # dimensionality of the subset and we pop or pad indices/ranges accordingly.
    # In that case, we also set the subset to start from 0 in each dimension.
    if not src_subset:
        if src_name:
            desc = arrays[src_name]
            if isinstance(desc, data.View) or edge.data.data == dst_name:
                src_subset = subsets.Range.from_array(desc)
                src_expr = src_subset.num_elements()
                src_expr_exact = src_subset.num_elements_exact()
                dst_expr = dst_subset.num_elements()
                dst_expr_exact = dst_subset.num_elements_exact()
                if (src_expr != dst_expr and symbolic.inequal_symbols(
                        src_expr_exact, dst_expr_exact)):
                    raise ValueError(
                        "Source subset is missing (dst_subset: {}, "
                        "src_shape: {}".format(dst_subset, desc.shape))
            else:
                src_subset = copy.deepcopy(dst_subset)
                padding = len(desc.shape) - len(src_subset)
                if padding != 0:
                    if padding > 0:
                        if isinstance(src_subset, subsets.Indices):
                            indices = [0] * padding + src_subset.indices
                            src_subset = subsets.Indices(indices)
                        elif isinstance(src_subset, subsets.Range):
                            ranges = [(0, 0, 1)] * padding + src_subset.ranges
                            src_subset = subsets.Range(ranges)
                    elif padding < 0:
                        if isinstance(src_subset, subsets.Indices):
                            indices = src_subset.indices[-padding:]
                            src_subset = subsets.Indices(indices)
                        elif isinstance(src_subset, subsets.Range):
                            ranges = src_subset.ranges[-padding:]
                            src_subset = subsets.Range(ranges)
                    src_subset.offset(src_subset, True)
    elif not dst_subset:
        if dst_name:
            desc = arrays[dst_name]
            if isinstance(desc, data.View) or edge.data.data == src_name:
                dst_subset = subsets.Range.from_array(desc)
                src_expr = src_subset.num_elements()
                src_expr_exact = src_subset.num_elements_exact()
                dst_expr = dst_subset.num_elements()
                dst_expr_exact = dst_subset.num_elements_exact()
                if (src_expr != dst_expr and symbolic.inequal_symbols(
                        src_expr_exact, dst_expr_exact)):
                    raise ValueError(
                        "Destination subset is missing (src_subset: {}, "
                        "dst_shape: {}".format(src_subset, desc.shape))
            else:
                dst_subset = copy.deepcopy(src_subset)
                padding = len(desc.shape) - len(dst_subset)
                if padding != 0:
                    if padding > 0:
                        if isinstance(dst_subset, subsets.Indices):
                            indices = [0] * padding + dst_subset.indices
                            dst_subset = subsets.Indices(indices)
                        elif isinstance(dst_subset, subsets.Range):
                            ranges = [(0, 0, 1)] * padding + dst_subset.ranges
                            dst_subset = subsets.Range(ranges)
                    elif padding < 0:
                        if isinstance(dst_subset, subsets.Indices):
                            indices = dst_subset.indices[-padding:]
                            dst_subset = subsets.Indices(indices)
                        elif isinstance(dst_subset, subsets.Range):
                            ranges = dst_subset.ranges[-padding:]
                            dst_subset = subsets.Range(ranges)
                    dst_subset.offset(dst_subset, True)

    return src_subset, dst_subset
Ejemplo n.º 18
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        node_a = self.node_a(sdfg)
        node_b = self.node_b(sdfg)

        # Determine direction of new memlet
        scope_dict = graph.scope_dict()
        propagate_forward = sd.scope_contains_scope(scope_dict, node_a, node_b)

        array = self.array
        if array is None or len(array) == 0:
            array = next(e.data.data
                         for e in graph.edges_between(node_a, node_b)
                         if e.data.data is not None and e.data.wcr is None)

        original_edge = None
        invariant_memlet = None
        for edge in graph.edges_between(node_a, node_b):
            if array == edge.data.data:
                original_edge = edge
                invariant_memlet = edge.data
                break
        if invariant_memlet is None:
            for edge in graph.edges_between(node_a, node_b):
                original_edge = edge
                invariant_memlet = edge.data
                warnings.warn('Array %s not found! Using array %s instead.' %
                              (array, invariant_memlet.data))
                array = invariant_memlet.data
                break
        if invariant_memlet is None:
            raise NameError('Array %s not found!' % array)

        # Add transient array
        new_data, _ = sdfg.add_array('trans_' + invariant_memlet.data, [
            symbolic.overapproximate(r)
            for r in invariant_memlet.bounding_box_size()
        ],
                                     sdfg.arrays[invariant_memlet.data].dtype,
                                     transient=True,
                                     find_new_name=True)
        data_node = nodes.AccessNode(new_data)

        # Store as fields so that other transformations can use them
        self._local_name = new_data
        self._data_node = data_node

        to_data_mm = copy.deepcopy(invariant_memlet)
        from_data_mm = copy.deepcopy(invariant_memlet)
        offset = subsets.Indices([r[0] for r in invariant_memlet.subset])

        # Reconnect, assuming one edge to the access node
        graph.remove_edge(original_edge)
        if propagate_forward:
            graph.add_edge(node_a, original_edge.src_conn, data_node, None,
                           to_data_mm)
            new_edge = graph.add_edge(data_node, None, node_b,
                                      original_edge.dst_conn, from_data_mm)
        else:
            new_edge = graph.add_edge(node_a, original_edge.src_conn,
                                      data_node, None, to_data_mm)
            graph.add_edge(data_node, None, node_b, original_edge.dst_conn,
                           from_data_mm)

        # Offset all edges in the memlet tree (including the new edge)
        for edge in graph.memlet_tree(new_edge):
            edge.data.subset.offset(offset, True)
            edge.data.data = new_data

        return data_node
Ejemplo n.º 19
0
def copy_expr(
    dispatcher,
    sdfg,
    dataname,
    memlet,
    is_write=None,  # Otherwise it's a read
    offset=None,
    relative_offset=True,
    packed_types=False,
):
    datadesc = sdfg.arrays[dataname]
    if relative_offset:
        s = memlet.subset
        o = offset
    else:
        if offset is None:
            s = None
        elif not isinstance(offset, subsets.Subset):
            s = subsets.Indices(offset)
        else:
            s = offset
        o = None
    if s is not None:
        offset_cppstr = cpp_offset_expr(datadesc, s, o)
    else:
        offset_cppstr = "0"
    dt = ""

    expr = ptr(dataname, datadesc)

    def_type, _ = dispatcher.defined_vars.get(dataname)

    add_offset = offset_cppstr != "0"

    if def_type in [DefinedType.Pointer, DefinedType.ArrayInterface]:
        if def_type == DefinedType.ArrayInterface:
            # If this is a view, it has already been renamed
            if not isinstance(datadesc, data.View):
                if is_write is None:
                    raise ValueError(
                        "is_write must be set for ArrayInterface.")
                expr = array_interface_variable(expr, is_write, dispatcher)
        return "{}{}{}".format(
            dt, expr, " + {}".format(offset_cppstr) if add_offset else "")

    elif def_type == DefinedType.StreamArray:
        return "{}[{}]".format(expr, offset_cppstr)

    elif def_type == DefinedType.FPGA_ShiftRegister:
        return expr

    elif def_type in [DefinedType.Scalar, DefinedType.Stream]:

        if add_offset:
            raise TypeError("Tried to offset address of scalar {}: {}".format(
                dataname, offset_cppstr))

        if def_type == DefinedType.Scalar:
            return "{}&{}".format(dt, expr)
        else:
            return dataname
    else:
        raise NotImplementedError("copy_expr not implemented "
                                  "for connector type: {}".format(def_type))