Esempio n. 1
0
    def can_be_applied(self, graph, expr_index, sdfg, permissive=False):
        first_state: SDFGState = self.first_state
        second_state: SDFGState = self.second_state

        out_edges = graph.out_edges(first_state)
        in_edges = graph.in_edges(first_state)

        # First state must have only one output edge (with dst the second
        # state).
        if len(out_edges) != 1:
            return False
        # If both states have more than one incoming edge, some control flow
        # may become ambiguous
        if len(in_edges) > 1 and graph.in_degree(second_state) > 1:
            return False
        # The interstate edge must not have a condition.
        if not out_edges[0].data.is_unconditional():
            return False
        # The interstate edge may have assignments, as long as there are input
        # edges to the first state that can absorb them.
        if out_edges[0].data.assignments:
            if not in_edges:
                return False
            # Fail if symbol is set before the state to fuse
            new_assignments = set(out_edges[0].data.assignments.keys())
            if any((new_assignments & set(e.data.assignments.keys())) for e in in_edges):
                return False
            # Fail if symbol is used in the dataflow of that state
            if len(new_assignments & first_state.free_symbols) > 0:
                return False
            # Fail if assignments have free symbols that are updated in the
            # first state
            freesyms = out_edges[0].data.free_symbols
            if freesyms and any(n.data in freesyms for n in first_state.nodes()
                                if isinstance(n, nodes.AccessNode) and first_state.in_degree(n) > 0):
                return False
            # Fail if symbols assigned on the first edge are free symbols on the
            # second edge
            symbols_used = set(out_edges[0].data.free_symbols)
            for e in in_edges:
                if e.data.assignments.keys() & symbols_used:
                    return False

        # There can be no state that have output edges pointing to both the
        # first and the second state. Such a case will produce a multi-graph.
        for src, _, _ in in_edges:
            for _, dst, _ in graph.out_edges(src):
                if dst == second_state:
                    return False

        if not permissive:
            # Strict mode that inhibits state fusion if Python callbacks are involved
            if Config.get_bool('frontend', 'dont_fuse_callbacks'):
                for node in (first_state.data_nodes() + second_state.data_nodes()):
                    if node.data == '__pystate':
                        return False

            # NOTE: This is quick fix for MPI Waitall (probably also needed for
            # Wait), until we have a better SDFG representation of the buffer
            # dependencies.
            try:
                from dace.libraries.mpi import Waitall
                next(node for node in first_state.nodes() if isinstance(node, Waitall) or node.label == '_Waitall_')
                return False
            except StopIteration:
                pass
            try:
                from dace.libraries.mpi import Waitall
                next(node for node in second_state.nodes() if isinstance(node, Waitall) or node.label == '_Waitall_')
                return False
            except StopIteration:
                pass

            # If second state has other input edges, there might be issues
            # Exceptions are when none of the states contain dataflow, unless
            # the first state is an initial state (in which case the new initial
            # state would be ambiguous).
            first_in_edges = graph.in_edges(first_state)
            second_in_edges = graph.in_edges(second_state)
            if ((not second_state.is_empty() or not first_state.is_empty() or len(first_in_edges) == 0)
                    and len(second_in_edges) != 1):
                return False

            # Get connected components.
            first_cc = [cc_nodes for cc_nodes in nx.weakly_connected_components(first_state._nx)]
            second_cc = [cc_nodes for cc_nodes in nx.weakly_connected_components(second_state._nx)]

            # Find source/sink (data) nodes
            first_input = {node for node in sdutil.find_source_nodes(first_state) if isinstance(node, nodes.AccessNode)}
            first_output = {
                node
                for node in first_state.scope_children()[None]
                if isinstance(node, nodes.AccessNode) and node not in first_input
            }
            second_input = {
                node
                for node in sdutil.find_source_nodes(second_state) if isinstance(node, nodes.AccessNode)
            }
            second_output = {
                node
                for node in second_state.scope_children()[None]
                if isinstance(node, nodes.AccessNode) and node not in second_input
            }

            # Find source/sink (data) nodes by connected component
            first_cc_input = [cc.intersection(first_input) for cc in first_cc]
            first_cc_output = [cc.intersection(first_output) for cc in first_cc]
            second_cc_input = [cc.intersection(second_input) for cc in second_cc]
            second_cc_output = [cc.intersection(second_output) for cc in second_cc]

            # Apply transformation in case all paths to the second state's
            # nodes go through the same access node, which implies sequential
            # behavior in SDFG semantics.
            first_output_names = {node.data for node in first_output}
            second_input_names = {node.data for node in second_input}

            # If any second input appears more than once, fail
            if len(second_input) > len(second_input_names):
                return False

            # If any first output that is an input to the second state
            # appears in more than one CC, fail
            matches = first_output_names & second_input_names
            for match in matches:
                cc_appearances = 0
                for cc in first_cc_output:
                    if len([n for n in cc if n.data == match]) > 0:
                        cc_appearances += 1
                if cc_appearances > 1:
                    return False

            # Recreate fused connected component correspondences, and then
            # check for hazards
            resulting_ccs: List[CCDesc] = StateFusion.find_fused_components(first_cc_input, first_cc_output,
                                                                            second_cc_input, second_cc_output)

            # Check for data races
            for fused_cc in resulting_ccs:
                # Write-Write hazard - data is output of both first and second
                # states, without a read in between
                write_write_candidates = ((fused_cc.first_outputs & fused_cc.second_outputs) - fused_cc.second_inputs)

                # Find the leaf (topological) instances of the matches
                order = [
                    x for x in reversed(list(nx.topological_sort(first_state._nx)))
                    if isinstance(x, nodes.AccessNode) and x.data in fused_cc.first_outputs
                ]
                # Those nodes will be the connection points upon fusion
                match_nodes = {
                    next(n for n in order if n.data == match)
                    for match in (fused_cc.first_outputs
                                  & fused_cc.second_inputs)
                }

                # If we have potential candidates, check if there is a
                # path from the first write to the second write (in that
                # case, there is no hazard):
                for cand in write_write_candidates:
                    nodes_first = [n for n in first_output if n.data == cand]
                    nodes_second = [n for n in second_output if n.data == cand]

                    # If there is a path for the candidate that goes through
                    # the match nodes in both states, there is no conflict
                    fail = False
                    path_found = False
                    for match in match_nodes:
                        for node in nodes_first:
                            path_to = nx.has_path(first_state._nx, node, match)
                            if not path_to:
                                continue
                            path_found = True
                            node2 = next(n for n in second_input if n.data == match.data)
                            if not all(nx.has_path(second_state._nx, node2, n) for n in nodes_second):
                                fail = True
                                break
                        if fail or path_found:
                            break

                    # Check for intersection (if None, fusion is ok)
                    if fail or not path_found:
                        if StateFusion.memlets_intersect(first_state, nodes_first, False, second_state, nodes_second,
                                                         False):
                            return False
                # End of write-write hazard check

                first_inout = fused_cc.first_inputs | fused_cc.first_outputs
                for other_cc in resulting_ccs:
                    # NOTE: Special handling for `other_cc is fused_cc`
                    if other_cc is fused_cc:
                        # Checking for potential Read-Write data races
                        for d in first_inout:
                            if d in other_cc.second_outputs:
                                nodes_second = [n for n in second_output if n.data == d]
                                # Read-Write race
                                if d in fused_cc.first_inputs:
                                    nodes_first = [n for n in first_input if n.data == d]
                                else:
                                    nodes_first = []
                                for n2 in nodes_second:
                                    for e in second_state.in_edges(n2):
                                        path = second_state.memlet_path(e)
                                        src = path[0].src
                                        if src in second_input and src.data in fused_cc.first_outputs:
                                            for n1 in fused_cc.first_output_nodes:
                                                if n1.data == src.data:
                                                    for n0 in nodes_first:
                                                        if not nx.has_path(first_state._nx, n0, n1):
                                                            return False
                        continue
                    # If an input/output of a connected component in the first
                    # state is an output of another connected component in the
                    # second state, we have a potential data race (Read-Write
                    # or Write-Write)
                    for d in first_inout:
                        if d in other_cc.second_outputs:
                            # Check for intersection (if None, fusion is ok)
                            nodes_second = [n for n in second_output if n.data == d]
                            # Read-Write race
                            if d in fused_cc.first_inputs:
                                nodes_first = [n for n in first_input if n.data == d]
                                if StateFusion.memlets_intersect(first_state, nodes_first, True, second_state,
                                                                 nodes_second, False):
                                    return False
                            # Write-Write race
                            if d in fused_cc.first_outputs:
                                nodes_first = [n for n in first_output if n.data == d]
                                if StateFusion.memlets_intersect(first_state, nodes_first, False, second_state,
                                                                 nodes_second, False):
                                    return False
                    # End of data race check

                # Read-after-write dependencies: if there is an output of the
                # second state that is an input of the first, ensure all paths
                # from the input of the first state lead to the output.
                # Otherwise, there may be a RAW due to topological sort or
                # concurrency.
                second_inout = ((fused_cc.first_inputs | fused_cc.first_outputs) & fused_cc.second_outputs)
                for inout in second_inout:
                    nodes_first = [n for n in match_nodes if n.data == inout]
                    if any(first_state.out_degree(n) > 0 for n in nodes_first):
                        return False

                    # If we have potential candidates, check if there is a
                    # path from the first read to the second write (in that
                    # case, there is no hazard):
                    nodes_first = {
                        n
                        for n in fused_cc.first_input_nodes
                        | fused_cc.first_output_nodes if n.data == inout
                    }
                    nodes_second = {n for n in fused_cc.second_output_nodes if n.data == inout}

                    # If there is a path for the candidate that goes through
                    # the match nodes in both states, there is no conflict
                    fail = False
                    path_found = False
                    for match in match_nodes:
                        for node in nodes_first:
                            path_to = nx.has_path(first_state._nx, node, match)
                            if not path_to:
                                continue
                            path_found = True
                            node2 = next(n for n in second_input if n.data == match.data)
                            if not all(nx.has_path(second_state._nx, node2, n) for n in nodes_second):
                                fail = True
                                break
                        if fail or path_found:
                            break

                    # Check for intersection (if None, fusion is ok)
                    if fail or not path_found:
                        if StateFusion.memlets_intersect(first_state, nodes_first, True, second_state, nodes_second,
                                                         False):
                            return False
                # End of read-write hazard check

                # Read-after-write dependencies: if there is more than one first
                # output with the same data, make sure it can be unambiguously
                # connected to the second state
                if (len(fused_cc.first_output_nodes) > len(fused_cc.first_outputs)):
                    for inpnode in fused_cc.second_input_nodes:
                        found = None
                        for outnode in fused_cc.first_output_nodes:
                            if outnode.data != inpnode.data:
                                continue
                            if StateFusion.memlets_intersect(first_state, [outnode], False, second_state, [inpnode],
                                                             True):
                                # If found more than once, either there is a
                                # path from one to another or it is ambiguous
                                if found is not None:
                                    if nx.has_path(first_state.nx, outnode, found):
                                        # Found is a descendant, continue
                                        continue
                                    elif nx.has_path(first_state.nx, found, outnode):
                                        # New node is a descendant, set as found
                                        found = outnode
                                    else:
                                        # No path: ambiguous match
                                        return False
                                found = outnode

        return True
Esempio n. 2
0
def generate_code(sdfg) -> List[CodeObject]:
    """ Generates code as a list of code objects for a given SDFG.
        :param sdfg: The SDFG to use
        :return: List of code objects that correspond to files to compile.
    """
    # Before compiling, validate SDFG correctness
    sdfg.validate()

    if Config.get_bool('experimental', 'test_serialization'):
        from dace.sdfg import SDFG
        import filecmp
        sdfg.save('test.sdfg')
        sdfg2 = SDFG.from_file('test.sdfg')
        sdfg2.save('test2.sdfg')
        print('Testing SDFG serialization...')
        if not filecmp.cmp('test.sdfg', 'test2.sdfg'):
            raise RuntimeError(
                'SDFG serialization failed - files do not match')
        os.remove('test.sdfg')
        os.remove('test2.sdfg')

        # Run with the deserialized version
        sdfg = sdfg2

    frame = framecode.DaCeCodeGenerator()
    # Instantiate all targets (who register themselves with framecodegen)
    targets = {
        name: STRING_TO_TARGET[name](frame, sdfg)
        for name in _TARGET_REGISTER_ORDER
    }

    # Instantiate all instrumentation providers in SDFG
    frame._dispatcher.instrumentation[
        dtypes.InstrumentationType.No_Instrumentation] = None
    for node, _ in sdfg.all_nodes_recursive():
        if hasattr(node, 'instrument'):
            frame._dispatcher.instrumentation[node.instrument] = \
                INSTRUMENTATION_PROVIDERS[node.instrument]
        elif hasattr(node, 'consume'):
            frame._dispatcher.instrumentation[node.consume.instrument] = \
                INSTRUMENTATION_PROVIDERS[node.consume.instrument]
        elif hasattr(node, 'map'):
            frame._dispatcher.instrumentation[node.map.instrument] = \
                INSTRUMENTATION_PROVIDERS[node.map.instrument]
    frame._dispatcher.instrumentation = {
        k: v() if v is not None else None
        for k, v in frame._dispatcher.instrumentation.items()
    }

    # Generate frame code (and the rest of the code)
    global_code, frame_code, used_targets = frame.generate_code(sdfg, None)
    target_objects = [
        CodeObject(sdfg.name, global_code + frame_code, 'cpp', cpu.CPUCodeGen,
                   'Frame')
    ]

    # Create code objects for each target
    for tgt in used_targets:
        target_objects.extend(tgt.get_generated_codeobjects())

    return target_objects
Esempio n. 3
0
def timethis(sdfg, title, flop_count, f, *args, **kwargs):
    """ Runs a function multiple (`DACE_treps`) times, logs the running times 
        to a file, and prints the median time (with FLOPs if given).
        :param sdfg: The SDFG belonging to the measurement.
        :param title: A title of the measurement.
        :param flop_count: Number of floating point operations in `program`.
                           If greater than zero, produces a median FLOPS 
                           report.
        :param f: The function to measure.
        :param args: Arguments to invoke the function with.
        :param kwargs: Keyword arguments to invoke the function with.
        :return: Latest return value of the function.
    """

    start = timer()
    REPS = int(Config.get('treps'))

    times = [start] * (REPS + 1)
    ret = None
    print('\nProfiling...')
    iterator = range(REPS)
    if Config.get_bool('profiling_status'):
        try:
            from tqdm import tqdm
            iterator = tqdm(iterator, desc="Profiling", file=sys.stdout)
        except ImportError:
            print(
                'WARNING: Cannot show profiling progress, missing optional '
                'dependency tqdm...\n\tTo see a live progress bar please install '
                'tqdm (`pip install tqdm`)\n\tTo disable this feature (and '
                'this warning) set `profiling_status` to false in the dace '
                'config (~/.dace.conf).')
    for i in iterator:
        # Call function
        ret = f(*args, **kwargs)
        times[i + 1] = timer()

    diffs = np.array([(times[i] - times[i - 1]) for i in range(1, REPS + 1)])

    problem_size = sys.argv[1] if len(sys.argv) >= 2 else 0

    profiling_dir = os.path.join(sdfg.build_folder, 'profiling')
    os.makedirs(profiling_dir, exist_ok=True)
    timestamp_string = str(int(time.time() * 1000))
    outfile_path = os.path.join(profiling_dir,
                                'results-' + timestamp_string + '.csv')

    with open(outfile_path, 'w') as f:
        f.write('Program,Optimization,Problem_Size,Runtime_sec\n')
        for d in diffs:
            f.write('%s,%s,%s,%.8f\n' % (sdfg.name, title, problem_size, d))

    if flop_count > 0:
        gflops_arr = (flop_count / diffs) * 1e-9
        time_secs = np.median(diffs)
        GFLOPs = (flop_count / time_secs) * 1e-9
        print(title, GFLOPs, 'GFLOP/s       (', time_secs * 1000, 'ms)')
    else:
        time_secs = np.median(diffs)
        print(title, time_secs * 1000, 'ms')

    return ret
Esempio n. 4
0
def validate_state(state: 'dace.sdfg.SDFGState',
                   state_id: int = None,
                   sdfg: 'dace.sdfg.SDFG' = None,
                   symbols: Dict[str, dtypes.typeclass] = None):
    """ Verifies the correctness of an SDFG state by applying multiple
        tests. Raises an InvalidSDFGError with the erroneous node on
        failure.
    """
    # Avoid import loops
    from dace.sdfg import SDFG
    from dace.config import Config
    from dace.sdfg import nodes as nd
    from dace.sdfg.scope import scope_contains_scope
    from dace import data as dt
    from dace import subsets as sbs

    sdfg = sdfg or state.parent
    state_id = state_id or sdfg.node_id(state)
    symbols = symbols or {}

    if not dtypes.validate_name(state._label):
        raise InvalidSDFGError("Invalid state name", sdfg, state_id)

    if state._parent != sdfg:
        raise InvalidSDFGError("State does not point to the correct "
                               "parent", sdfg, state_id)

    # Unreachable
    ########################################
    if (sdfg.number_of_nodes() > 1 and sdfg.in_degree(state) == 0
            and sdfg.out_degree(state) == 0):
        raise InvalidSDFGError("Unreachable state", sdfg, state_id)

    for nid, node in enumerate(state.nodes()):
        # Node validation
        try:
            node.validate(sdfg, state)
        except InvalidSDFGError:
            raise
        except Exception as ex:
            raise InvalidSDFGNodeError("Node validation failed: " + str(ex),
                                       sdfg, state_id, nid) from ex

        # Isolated nodes
        ########################################
        if state.in_degree(node) + state.out_degree(node) == 0:
            # One corner case: OK if this is a code node
            if isinstance(node, nd.CodeNode):
                pass
            else:
                raise InvalidSDFGNodeError("Isolated node", sdfg, state_id,
                                           nid)

        # Scope tests
        ########################################
        if isinstance(node, nd.EntryNode):
            try:
                state.exit_node(node)
            except StopIteration:
                raise InvalidSDFGNodeError(
                    "Entry node does not have matching "
                    "exit node",
                    sdfg,
                    state_id,
                    nid,
                )

        if isinstance(node, (nd.EntryNode, nd.ExitNode)):
            for iconn in node.in_connectors:
                if (iconn is not None and iconn.startswith("IN_")
                        and ("OUT_" + iconn[3:]) not in node.out_connectors):
                    raise InvalidSDFGNodeError(
                        "No match for input connector %s in output "
                        "connectors" % iconn,
                        sdfg,
                        state_id,
                        nid,
                    )
            for oconn in node.out_connectors:
                if (oconn is not None and oconn.startswith("OUT_")
                        and ("IN_" + oconn[4:]) not in node.in_connectors):
                    raise InvalidSDFGNodeError(
                        "No match for output connector %s in input "
                        "connectors" % oconn,
                        sdfg,
                        state_id,
                        nid,
                    )

        # Node-specific tests
        ########################################
        if isinstance(node, nd.AccessNode):
            if node.data not in sdfg.arrays:
                raise InvalidSDFGNodeError(
                    "Access node must point to a valid array name in the SDFG",
                    sdfg,
                    state_id,
                    nid,
                )
            arr = sdfg.arrays[node.data]

            # Verify View references
            if isinstance(arr, dt.View):
                from dace.sdfg import utils as sdutil  # Avoid import loops
                if sdutil.get_view_edge(state, node) is None:
                    raise InvalidSDFGNodeError(
                        "Ambiguous or invalid edge to/from a View access node",
                        sdfg, state_id, nid)

            # Find uninitialized transients
            if (arr.transient and state.in_degree(node) == 0
                    and state.out_degree(node) > 0
                    # Streams do not need to be initialized
                    and not isinstance(arr, dt.Stream)):
                # Find other instances of node in predecessor states
                states = sdfg.predecessor_states(state)
                input_found = False
                for s in states:
                    for onode in s.nodes():
                        if (isinstance(onode, nd.AccessNode)
                                and onode.data == node.data):
                            if s.in_degree(onode) > 0:
                                input_found = True
                                break
                    if input_found:
                        break
                if not input_found and node.setzero == False:
                    warnings.warn(
                        'WARNING: Use of uninitialized transient "%s" in state %s'
                        % (node.data, state.label))

            # Find writes to input-only arrays
            only_empty_inputs = all(e.data.is_empty()
                                    for e in state.in_edges(node))
            if (not arr.transient) and (not only_empty_inputs):
                nsdfg_node = sdfg.parent_nsdfg_node
                if nsdfg_node is not None:
                    if node.data not in nsdfg_node.out_connectors:
                        raise InvalidSDFGNodeError(
                            'Data descriptor %s is '
                            'written to, but only given to nested SDFG as an '
                            'input connector' % node.data, sdfg, state_id, nid)

        if (isinstance(node, nd.ConsumeEntry)
                and "IN_stream" not in node.in_connectors):
            raise InvalidSDFGNodeError(
                "Consume entry node must have an input stream", sdfg, state_id,
                nid)
        if (isinstance(node, nd.ConsumeEntry)
                and "OUT_stream" not in node.out_connectors):
            raise InvalidSDFGNodeError(
                "Consume entry node must have an internal stream",
                sdfg,
                state_id,
                nid,
            )

        # Connector tests
        ########################################
        # Check for duplicate connector names (unless it's a nested SDFG)
        if (len(node.in_connectors.keys() & node.out_connectors.keys()) > 0
                and not isinstance(node, (nd.NestedSDFG, nd.LibraryNode))):
            dups = node.in_connectors.keys() & node.out_connectors.keys()
            raise InvalidSDFGNodeError("Duplicate connectors: " + str(dups),
                                       sdfg, state_id, nid)

        # Check for connectors that are also array/symbol names
        if isinstance(node, nd.Tasklet):
            for conn in node.in_connectors.keys():
                if conn in sdfg.arrays or conn in symbols:
                    raise InvalidSDFGNodeError(
                        f"Input connector {conn} already "
                        "defined as array or symbol", sdfg, state_id, nid)
            for conn in node.out_connectors.keys():
                if conn in sdfg.arrays or conn in symbols:
                    raise InvalidSDFGNodeError(
                        f"Output connector {conn} already "
                        "defined as array or symbol", sdfg, state_id, nid)

        # Check for dangling connectors (incoming)
        for conn in node.in_connectors:
            incoming_edges = 0
            for e in state.in_edges(node):
                # Connector found
                if e.dst_conn == conn:
                    incoming_edges += 1

            if incoming_edges == 0:
                raise InvalidSDFGNodeError("Dangling in-connector %s" % conn,
                                           sdfg, state_id, nid)
            # Connectors may have only one incoming edge
            # Due to input connectors of scope exit, this is only correct
            # in some cases:
            if incoming_edges > 1 and not isinstance(node, nd.ExitNode):
                raise InvalidSDFGNodeError(
                    "Connector '%s' cannot have more "
                    "than one incoming edge, found %d" %
                    (conn, incoming_edges),
                    sdfg,
                    state_id,
                    nid,
                )

        # Check for dangling connectors (outgoing)
        for conn in node.out_connectors:
            outgoing_edges = 0
            for e in state.out_edges(node):
                # Connector found
                if e.src_conn == conn:
                    outgoing_edges += 1

            if outgoing_edges == 0:
                raise InvalidSDFGNodeError("Dangling out-connector %s" % conn,
                                           sdfg, state_id, nid)

            # In case of scope exit or code node, only one outgoing edge per
            # connector is allowed.
            if outgoing_edges > 1 and isinstance(node,
                                                 (nd.ExitNode, nd.CodeNode)):
                raise InvalidSDFGNodeError(
                    "Connector '%s' cannot have more "
                    "than one outgoing edge, found %d" %
                    (conn, outgoing_edges),
                    sdfg,
                    state_id,
                    nid,
                )

        # Check for edges to nonexistent connectors
        for e in state.in_edges(node):
            if e.dst_conn is not None and e.dst_conn not in node.in_connectors:
                raise InvalidSDFGNodeError(
                    ("Memlet %s leading to " + "nonexistent connector %s") %
                    (str(e.data), e.dst_conn),
                    sdfg,
                    state_id,
                    nid,
                )
        for e in state.out_edges(node):
            if e.src_conn is not None and e.src_conn not in node.out_connectors:
                raise InvalidSDFGNodeError(
                    ("Memlet %s coming from " + "nonexistent connector %s") %
                    (str(e.data), e.src_conn),
                    sdfg,
                    state_id,
                    nid,
                )
        ########################################

    # Memlet checks
    scope = state.scope_dict()
    for eid, e in enumerate(state.edges()):
        # Edge validation
        try:
            e.data.validate(sdfg, state)
        except InvalidSDFGError:
            raise
        except Exception as ex:
            raise InvalidSDFGEdgeError("Edge validation failed: " + str(ex),
                                       sdfg, state_id, eid)

        # For every memlet, obtain its full path in the DFG
        path = state.memlet_path(e)
        src_node = path[0].src
        dst_node = path[-1].dst

        # Check if memlet data matches src or dst nodes
        if (e.data.data is not None
                and (isinstance(src_node, nd.AccessNode)
                     or isinstance(dst_node, nd.AccessNode))
                and (not isinstance(src_node, nd.AccessNode)
                     or e.data.data != src_node.data)
                and (not isinstance(dst_node, nd.AccessNode)
                     or e.data.data != dst_node.data)):
            raise InvalidSDFGEdgeError(
                "Memlet data does not match source or destination "
                "data nodes)",
                sdfg,
                state_id,
                eid,
            )

        # Check memlet subset validity with respect to source/destination nodes
        if e.data.data is not None and e.data.allow_oob == False:
            subset_node = (dst_node if isinstance(dst_node, nd.AccessNode)
                           and e.data.data == dst_node.data else src_node)
            other_subset_node = (
                dst_node if isinstance(dst_node, nd.AccessNode)
                and e.data.data != dst_node.data else src_node)

            if isinstance(subset_node, nd.AccessNode):
                arr = sdfg.arrays[subset_node.data]
                # Dimensionality
                if e.data.subset.dims() != len(arr.shape):
                    raise InvalidSDFGEdgeError(
                        "Memlet subset does not match node dimension "
                        "(expected %d, got %d)" %
                        (len(arr.shape), e.data.subset.dims()),
                        sdfg,
                        state_id,
                        eid,
                    )

                # Bounds
                if any(((minel + off) < 0) == True for minel, off in zip(
                        e.data.subset.min_element(), arr.offset)):
                    raise InvalidSDFGEdgeError(
                        "Memlet subset negative out-of-bounds", sdfg, state_id,
                        eid)
                if any(((maxel + off) >= s) == True for maxel, s, off in zip(
                        e.data.subset.max_element(), arr.shape, arr.offset)):
                    raise InvalidSDFGEdgeError("Memlet subset out-of-bounds",
                                               sdfg, state_id, eid)
            # Test other_subset as well
            if e.data.other_subset is not None and isinstance(
                    other_subset_node, nd.AccessNode):
                arr = sdfg.arrays[other_subset_node.data]
                # Dimensionality
                if e.data.other_subset.dims() != len(arr.shape):
                    raise InvalidSDFGEdgeError(
                        "Memlet other_subset does not match node dimension "
                        "(expected %d, got %d)" %
                        (len(arr.shape), e.data.other_subset.dims()),
                        sdfg,
                        state_id,
                        eid,
                    )

                # Bounds
                if any(((minel + off) < 0) == True for minel, off in zip(
                        e.data.other_subset.min_element(), arr.offset)):
                    raise InvalidSDFGEdgeError(
                        "Memlet other_subset negative out-of-bounds",
                        sdfg,
                        state_id,
                        eid,
                    )
                if any(((maxel + off) >= s) == True for maxel, s, off in zip(
                        e.data.other_subset.max_element(), arr.shape,
                        arr.offset)):
                    raise InvalidSDFGEdgeError(
                        "Memlet other_subset out-of-bounds", sdfg, state_id,
                        eid)

            # Test subset and other_subset for undefined symbols
            if Config.get_bool('experimental', 'validate_undefs'):
                # TODO: Traverse by scopes and accumulate data
                defined_symbols = state.symbols_defined_at(e.dst)
                undefs = (e.data.subset.free_symbols -
                          set(defined_symbols.keys()))
                if len(undefs) > 0:
                    raise InvalidSDFGEdgeError(
                        'Undefined symbols %s found in memlet subset' % undefs,
                        sdfg, state_id, eid)
                if e.data.other_subset is not None:
                    undefs = (e.data.other_subset.free_symbols -
                              set(defined_symbols.keys()))
                    if len(undefs) > 0:
                        raise InvalidSDFGEdgeError(
                            'Undefined symbols %s found in memlet '
                            'other_subset' % undefs, sdfg, state_id, eid)
        #######################################

        # Memlet path scope lifetime checks
        # If scope(src) == scope(dst): OK
        if scope[src_node] == scope[dst_node] or src_node == scope[dst_node]:
            pass
        # If scope(src) contains scope(dst), then src must be a data node,
        # unless the memlet is empty in order to connect to a scope
        elif scope_contains_scope(scope, src_node, dst_node):
            pass
        # If scope(dst) contains scope(src), then dst must be a data node,
        # unless the memlet is empty in order to connect to a scope
        elif scope_contains_scope(scope, dst_node, src_node):
            if not isinstance(dst_node, nd.AccessNode):
                if e.data.is_empty() and isinstance(dst_node, nd.ExitNode):
                    pass
                else:
                    raise InvalidSDFGEdgeError(
                        f"Memlet creates an invalid path (sink node {dst_node}"
                        " should be a data node)", sdfg, state_id, eid)
        # If scope(dst) is disjoint from scope(src), it's an illegal memlet
        else:
            raise InvalidSDFGEdgeError(
                "Illegal memlet between disjoint scopes", sdfg, state_id, eid)

        # Check dimensionality of memory access
        if isinstance(e.data.subset, (sbs.Range, sbs.Indices)):
            if e.data.subset.dims() != len(sdfg.arrays[e.data.data].shape):
                raise InvalidSDFGEdgeError(
                    "Memlet subset uses the wrong dimensions"
                    " (%dD for a %dD data node)" %
                    (e.data.subset.dims(), len(
                        sdfg.arrays[e.data.data].shape)),
                    sdfg,
                    state_id,
                    eid,
                )

        # Verify that source and destination subsets contain the same
        # number of elements
        if not e.data.allow_oob and e.data.other_subset is not None and not (
            (isinstance(src_node, nd.AccessNode)
             and isinstance(sdfg.arrays[src_node.data], dt.Stream)) or
            (isinstance(dst_node, nd.AccessNode)
             and isinstance(sdfg.arrays[dst_node.data], dt.Stream))):
            if (e.data.src_subset.num_elements() *
                    sdfg.arrays[src_node.data].veclen !=
                    e.data.dst_subset.num_elements() *
                    sdfg.arrays[dst_node.data].veclen):
                raise InvalidSDFGEdgeError(
                    'Dimensionality mismatch between src/dst subsets', sdfg,
                    state_id, eid)
Esempio n. 5
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        if self.expr_index == 0:
            cnode = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._map_entry]]
            node_schedprop = cnode.map
            exit_node = graph.exit_node(cnode)
        else:
            cnode = graph.nodes()[self.subgraph[
                GPUTransformLocalStorage._reduce]]
            node_schedprop = cnode
            exit_node = cnode

        # Change schedule
        node_schedprop._schedule = dtypes.ScheduleType.GPU_Device
        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._maps_transformed += 1
        # If nested graph is designated as sequential, transform schedules and
        # storage from Default to Sequential/Register
        if self.nested_seq and self.expr_index == 0:
            for node in graph.scope_subgraph(cnode).nodes():
                if isinstance(node, nodes.AccessNode):
                    arr = node.desc(sdfg)
                    if arr.storage == dtypes.StorageType.Default:
                        arr.storage = dtypes.StorageType.Register
                elif isinstance(node, nodes.MapEntry):
                    if node.map.schedule == dtypes.ScheduleType.Default:
                        node.map.schedule = dtypes.ScheduleType.Sequential

        gpu_storage_types = [
            dtypes.StorageType.GPU_Global,
            dtypes.StorageType.GPU_Shared,
        ]

        #######################################################
        # Add GPU copies of CPU arrays (i.e., not already on GPU)

        # First, understand which arrays to clone
        all_out_edges = []
        all_out_edges.extend(list(graph.out_edges(exit_node)))
        in_arrays_to_clone = set()
        out_arrays_to_clone = set()
        for e in graph.in_edges(cnode):
            data_node = sd.find_input_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                in_arrays_to_clone.add((data_node, e.data))
        for e in all_out_edges:
            data_node = sd.find_output_arraynode(graph, e)
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                out_arrays_to_clone.add((data_node, e.data))

        if Config.get_bool("debugprint"):
            GPUTransformLocalStorage._arrays_removed += len(
                in_arrays_to_clone) + len(out_arrays_to_clone)

        # Second, create a GPU clone of each array
        # TODO: Overapproximate union of memlets
        cloned_arrays = {}
        in_cloned_arraynodes = {}
        out_cloned_arraynodes = {}
        for array_node, memlet in in_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)

            in_cloned_arraynodes[array_node.data] = cloned_node
        for array_node, memlet in out_arrays_to_clone:
            array = array_node.desc(sdfg)
            cloned_name = "gpu_" + array_node.data
            for i, r in enumerate(memlet.bounding_box_size()):
                size = symbolic.overapproximate(r)
                try:
                    if int(size) == 1:
                        suffix = []
                        for c in str(memlet.subset[i][0]):
                            if c.isalpha() or c.isdigit() or c == "_":
                                suffix.append(c)
                            elif c == "+":
                                suffix.append("p")
                            elif c == "-":
                                suffix.append("m")
                            elif c == "*":
                                suffix.append("t")
                            elif c == "/":
                                suffix.append("d")
                        cloned_name += "_" + "".join(suffix)
                except:
                    continue
            if cloned_name in sdfg.arrays.keys():
                cloned_array = sdfg.arrays[cloned_name]
            elif array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                full_shape = []
                for r in memlet.bounding_box_size():
                    size = symbolic.overapproximate(r)
                    try:
                        full_shape.append(int(size))
                    except:
                        full_shape.append(size)
                actual_dims = [
                    idx for idx, r in enumerate(full_shape)
                    if not (isinstance(r, int) and r == 1)
                ]
                if len(actual_dims) == 0:  # abort
                    actual_dims = [len(full_shape) - 1]
                if isinstance(array, data.Scalar):
                    sdfg.add_array(name=cloned_name,
                                   shape=[1],
                                   dtype=array.dtype,
                                   transient=True,
                                   storage=dtypes.StorageType.GPU_Global)
                elif isinstance(array, data.Stream):
                    sdfg.add_stream(
                        name=cloned_name,
                        dtype=array.dtype,
                        shape=[full_shape[d] for d in actual_dims],
                        veclen=array.veclen,
                        buffer_size=array.buffer_size,
                        storage=dtypes.StorageType.GPU_Global,
                        transient=True,
                        offset=[array.offset[d] for d in actual_dims])
                else:
                    sdfg.add_array(
                        name=cloned_name,
                        shape=[full_shape[d] for d in actual_dims],
                        dtype=array.dtype,
                        transient=True,
                        storage=dtypes.StorageType.GPU_Global,
                        allow_conflicts=array.allow_conflicts,
                        strides=[array.strides[d] for d in actual_dims],
                        offset=[array.offset[d] for d in actual_dims],
                    )
                cloned_arrays[array_node.data] = cloned_name
            cloned_node = type(array_node)(cloned_name)
            cloned_node.setzero = True

            out_cloned_arraynodes[array_node.data] = cloned_node

        # Third, connect the cloned arrays to the originals
        for array_name, node in in_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in graph.in_edges(cnode):
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   newmemlet)

                    for e in graph.bfs_edges(edge.dst, reverse=False):
                        parent, _, _child, _, memlet = e
                        if parent != edge.dst and not in_scope(
                                graph, parent, edge.dst):
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[-1].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.ExitNode, forward=True):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   edge.data)
                    graph.remove_edge(edge)

        for array_name, node in out_cloned_arraynodes.items():
            graph.add_node(node)
            is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar)
            for edge in all_out_edges:
                if edge.data.data == array_name:
                    newmemlet = copy.deepcopy(edge.data)
                    newmemlet.data = node.data

                    if is_scalar:
                        newmemlet.subset = sbs.Indices([0])
                    else:
                        offset = []
                        lost_dims = []
                        lost_ranges = []
                        newsubset = [None] * len(edge.data.subset)
                        for ind, r in enumerate(edge.data.subset):
                            offset.append(r[0])
                            if isinstance(edge.data.subset[ind], tuple):
                                begin = edge.data.subset[ind][0] - r[0]
                                end = edge.data.subset[ind][1] - r[0]
                                step = edge.data.subset[ind][2]
                                if begin == end:
                                    lost_dims.append(ind)
                                    lost_ranges.append((begin, end, step))
                                else:
                                    newsubset[ind] = (begin, end, step)
                            else:
                                newsubset[ind] -= r[0]
                        if len(lost_dims) == len(edge.data.subset):
                            lost_dims.pop()
                            newmemlet.subset = type(
                                edge.data.subset)([lost_ranges[-1]])
                        else:
                            newmemlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])

                    graph.add_edge(edge.src, edge.src_conn, node, None,
                                   newmemlet)

                    end_node = graph.scope_dict()[edge.src]
                    for e in graph.bfs_edges(edge.src, reverse=True):
                        parent, _, _child, _, memlet = e
                        if parent == end_node:
                            break
                        if memlet.data != edge.data.data:
                            continue
                        path = graph.memlet_path(e)
                        if not isinstance(path[0].dst, nodes.CodeNode):
                            if in_path(path, e, nodes.EntryNode,
                                       forward=False):
                                if isinstance(parent, nodes.CodeNode):
                                    # Output edge
                                    break
                                else:
                                    continue
                        if is_scalar:
                            memlet.subset = sbs.Indices([0])
                        else:
                            newsubset = [None] * len(memlet.subset)
                            for ind, r in enumerate(memlet.subset):
                                if ind in lost_dims:
                                    continue
                                if isinstance(memlet.subset[ind], tuple):
                                    begin = r[0] - offset[ind]
                                    end = r[1] - offset[ind]
                                    step = r[2]
                                    newsubset[ind] = (begin, end, step)
                                else:
                                    newsubset[ind] = (
                                        r - offset[ind],
                                        r - offset[ind],
                                        1,
                                    )
                            memlet.subset = type(edge.data.subset)(
                                [r for r in newsubset if r is not None])
                        memlet.data = node.data

                    edge.data.wcr = None
                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = newmemlet.subset
                    graph.add_edge(node, None, edge.dst, edge.dst_conn,
                                   edge.data)
                    graph.remove_edge(edge)

        # Fourth, replace memlet arrays as necessary
        if self.expr_index == 0:
            scope_subgraph = graph.scope_subgraph(cnode)
            for edge in scope_subgraph.edges():
                if edge.data.data is not None and edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]
Esempio n. 6
0
def compileProgram(request, language, perfopts=None):
    if not request.json or (('code' not in request.json) and
                            ('sdfg' not in request.json)):
        print("[Error] No input code provided, cannot continue")
        abort(400)

    errors = []
    try:
        optpath = request.json['optpath']
    except:
        optpath = None

    try:
        sdfg_props = request.json['sdfg_props']
    except:
        sdfg_props = None

    if perfopts is None:
        try:
            perf_mode = request.json['perf_mode']
        except:
            perf_mode = None
    else:
        #print("Perfopts: " + str(perfopts))
        perf_mode = perfopts

    client_id = request.json['client_id']

    sdfg_dict = {}
    sdfg_eval_order = []

    with config_lock:  # Lock the config - the config may be modified while holding this lock, but the config MUST be restored.

        from dace.config import Config
        config_path = "./client_configs/" + client_id + ".conf"
        if os.path.isfile(config_path):
            Config.load(config_path)
        else:
            Config.load()

        dace_state = None
        in_sdfg = None
        if "sdfg" in request.json:
            in_sdfg = request.json['sdfg']
            if isinstance(in_sdfg, list):
                if len(in_sdfg) > 1:
                    # TODO: Allow multiple sdfg inputs
                    raise NotImplementedError("More than 1 SDFG provided")

                in_sdfg = in_sdfg[0]

            if isinstance(in_sdfg, str):
                in_sdfg = json.loads(in_sdfg)

            if isinstance(in_sdfg, dict):
                # Generate callbacks (needed for elements referencing others)
                def loader_callback(name: str):
                    # Check if already available and if yes, return it
                    if name in sdfg_dict:
                        return sdfg_dict[name]

                    # Else: This function has to recreate the given sdfg
                    sdfg_dict[name] = dace.SDFG.from_json(
                        in_sdfg[name], {
                            'sdfg': None,
                            'callback': loader_callback
                        })
                    sdfg_eval_order.append(name)
                    return sdfg_dict[name]

                for k, v in in_sdfg.items():
                    # Leave it be if the sdfg was already created
                    # (this might happen with SDFG references)
                    if k in sdfg_dict: continue
                    if isinstance(v, str):
                        v = json.loads(v)
                    sdfg_dict[k] = dace.SDFG.from_json(
                        v, {
                            'sdfg': None,
                            'callback': loader_callback
                        })
                    sdfg_eval_order.append(k)
            else:
                in_sdfg = dace.SDFG.from_json(in_sdfg)
                sdfg_dict[in_sdfg.name] = in_sdfg
        else:
            print("Using code to compile")
            code = request.json['code']
            if (isinstance(code, list)):
                if len(code) > 1:
                    print("More than 1 code file provided!")
                    abort(400)
                code = code[0]
            if language == "octave":
                statements = octave_frontend.parse(code, debug=False)
                statements.provide_parents()
                statements.specialize()
                sdfg = statements.generate_code()
                sdfg.set_sourcecode(code, "matlab")
            elif language == "dace":
                dace_state = create_DaceState(code, sdfg_dict, errors)

        # The DaceState uses the variable names in the dace code. This is not useful enough for us, so we translate
        copied_dict = {}
        for k, v in sdfg_dict.items():
            copied_dict[v.name] = v
        sdfg_dict = copied_dict

        if len(errors) == 0:
            if optpath is not None:
                for sdfg_name, op in optpath.items():
                    try:
                        sp = sdfg_props[sdfg_name]
                    except:
                        # In any error case, just ignore the properties
                        sp = None
                    print("Applying opts for " + sdfg_name)
                    print("Dict: " + str(sdfg_dict.keys()))
                    sdfg_dict[sdfg_name] = applyOptPath(sdfg_dict[sdfg_name],
                                                        op,
                                                        sdfg_props=sp)

        code_tuple_dict = {}
        # Deep-copy the SDFG (codegen may change the SDFG it operates on)
        codegen_sdfgs = copy.deepcopy(sdfg_dict)
        codegen_sdfgs_dace_state = copy.deepcopy(sdfg_dict)
        if len(errors) == 0:
            if sdfg_eval_order:
                sdfg_eval = [(n, codegen_sdfgs[n])
                             for n in reversed(sdfg_eval_order)]
            else:
                sdfg_eval = codegen_sdfgs.items()

            for n, s in sdfg_eval:
                try:
                    if Config.get_bool('diode', 'general',
                                       'library_autoexpand'):
                        s.expand_library_nodes()

                    code_tuple_dict[n] = codegen.generate_code(s)
                except dace.sdfg.NodeNotExpandedError as ex:
                    code_tuple_dict[n] = [str(ex)]
                except Exception:  # Forward exception to output code
                    code_tuple_dict[n] = [
                        'Code generation failed:\n' + traceback.format_exc()
                    ]

        if dace_state is None:
            if "code" in request.json:
                in_code = request.json['code']
            else:
                in_code = ""
            dace_state = DaceState(in_code, "tmp.py", remote=remote_execution)
            dace_state.set_sdfg(
                list(codegen_sdfgs_dace_state.values())[0],
                list(codegen_sdfgs_dace_state.keys())[0])
            if len(dace_state.errors) > 0:
                print("ERRORS: " + str(dace_state.errors))
                errors.extend(dace_state.errors)

        # The config won't save back on its own, and we don't want it to - these changes are transient

        if len(errors) > 0:
            return errors

        # Only return top-level SDFG
        return ({k: v
                 for k, v in sdfg_dict.items()
                 if v.parent is None}, code_tuple_dict, dace_state)
Esempio n. 7
0
def preprocess_dace_program(
    f: Callable[..., Any],
    argtypes: Dict[str, data.Data],
    global_vars: Dict[str, Any],
    modules: Dict[str, Any],
    resolve_functions: bool = False,
    parent_closure: Optional[SDFGClosure] = None
) -> Tuple[PreprocessedAST, SDFGClosure]:
    """
    Preprocesses a ``@dace.program`` and all its nested functions, returning
    a preprocessed AST object and the closure of the resulting SDFG.
    :param f: A Python function to parse.
    :param argtypes: An dictionary of (name, type) for the given
                        function's arguments, which may pertain to data
                        nodes or symbols (scalars).
    :param global_vars: A dictionary of global variables in the closure
                        of `f`.
    :param modules: A dictionary from an imported module name to the
                    module itself.
    :param constants: A dictionary from a name to a constant value.
    :param resolve_functions: If True, treats all global functions defined
                                outside of the program as returning constant
                                values.
    :param parent_closure: If not None, represents the closure of the parent of
                           the currently processed function.
    :return: A 2-tuple of the AST and its reduced (used) closure.
    """
    src_ast, src_file, src_line, src = astutils.function_to_ast(f)

    # Resolve data structures
    src_ast = StructTransformer(global_vars).visit(src_ast)

    src_ast = ModuleResolver(modules).visit(src_ast)
    # Convert modules after resolution
    for mod, modval in modules.items():
        if mod == 'builtins':
            continue
        newmod = global_vars[mod]
        #del global_vars[mod]
        global_vars[modval] = newmod

    # Resolve constants to their values (if they are not already defined in this scope)
    # and symbols to their names
    resolved = {
        k: v
        for k, v in global_vars.items() if k not in argtypes and k != '_'
    }
    closure_resolver = GlobalResolver(resolved, resolve_functions)

    # Append element to call stack and handle max recursion depth
    if parent_closure is not None:
        fid = id(f)
        if fid in parent_closure.callstack:
            raise DaceRecursionError(fid)
        if len(parent_closure.callstack) > Config.get(
                'frontend', 'implicit_recursion_depth'):
            raise TypeError(
                'Implicit (automatically parsed) recursion depth '
                'exceeded. Functions below this call will not be '
                'parsed. To change this setting, modify the value '
                '`frontend.implicit_recursion_depth` in .dace.conf')

        closure_resolver.closure.callstack = parent_closure.callstack + [fid]

    passes = int(Config.get('frontend', 'preprocessing_passes'))
    if passes >= 0:
        gen = range(passes)
    else:  # Run until the code stops changing

        def check_code(src_ast):
            old_src = ast.dump(src_ast)
            i = 0
            while True:
                yield i
                new_src = ast.dump(src_ast)
                if new_src == old_src:
                    return
                old_src = new_src
                i += 1

        gen = check_code(src_ast)

    for pass_num in gen:
        try:
            src_ast = closure_resolver.visit(src_ast)
            src_ast = LoopUnroller(resolved, src_file).visit(src_ast)
            src_ast = ConditionalCodeResolver(resolved).visit(src_ast)
            src_ast = DeadCodeEliminator().visit(src_ast)
        except Exception:
            if Config.get_bool('frontend', 'verbose_errors'):
                print(
                    f'VERBOSE: Failed to preprocess (pass #{pass_num}) the following program:'
                )
                print(astutils.unparse(src_ast))
            raise

    try:
        ctr = CallTreeResolver(closure_resolver.closure, resolved)
        ctr.visit(src_ast)
    except DaceRecursionError as ex:
        if id(f) == ex.fid:
            raise TypeError(
                'Parsing failed due to recursion in a data-centric '
                'context called from this function')
        else:
            raise ex
    used_arrays = ArrayClosureResolver(closure_resolver.closure)
    used_arrays.visit(src_ast)

    # Filter out arrays that are not used after dead code elimination
    closure_resolver.closure.closure_arrays = {
        k: v
        for k, v in closure_resolver.closure.closure_arrays.items()
        if k in used_arrays.arrays
    }

    # Filter out callbacks that were removed after dead code elimination
    closure_resolver.closure.callbacks = {
        k: v
        for k, v in closure_resolver.closure.callbacks.items()
        if k in ctr.seen_calls
    }

    # Filter remaining global variables according to type and scoping rules
    program_globals = {
        k: v
        for k, v in global_vars.items() if k not in argtypes
    }

    # Fill in data descriptors from closure arrays
    argtypes.update({
        arrname: v[1]
        for arrname, v in closure_resolver.closure.closure_arrays.items()
    })

    # Combine nested closures with the current one
    closure_resolver.closure.combine_nested_closures()

    past = PreprocessedAST(src_file, src_line, src, src_ast, program_globals)

    return past, closure_resolver.closure
Esempio n. 8
0
def configure_and_compile(program_folder,
                          program_name=None,
                          output_stream=None):
    """ Configures and compiles a DaCe program in the specified folder into a
        shared library file.

        :param program_folder: Folder containing all files necessary to build,
                               equivalent to what was passed to
                               `generate_program_folder`.
        :param output_stream: Additional output stream to write to (used for
                              DIODE client).
        :return: Path to the compiled shared library file.
    """

    if program_name is None:
        program_name = os.path.basename(program_folder)
    program_folder = os.path.abspath(program_folder)
    src_folder = os.path.join(program_folder, "src")

    # Prepare build folder
    build_folder = os.path.join(program_folder, "build")
    os.makedirs(build_folder, exist_ok=True)

    # Prepare performance report folder
    os.makedirs(os.path.join(program_folder, "perf"), exist_ok=True)

    # Read list of DaCe files to compile.
    # We do this instead of iterating over source files in the directory to
    # avoid globbing files from previous compilations, such that we don't need
    # to wipe the directory for every compilation.
    file_list = [
        line.strip().split(",")
        for line in open(os.path.join(program_folder, "dace_files.csv"), "r")
    ]

    # Get absolute paths and targets for all source files
    files = []
    targets = {}  # {target name: target class}
    for target_name, target_type, file_name in file_list:
        if target_type:
            path = os.path.join(target_name, target_type, file_name)
        else:
            path = os.path.join(target_name, file_name)
        files.append(path)
        targets[target_name] = next(
            k for k, v in TargetCodeGenerator.extensions().items()
            if v['name'] == target_name)

    # Windows-only workaround: Override Visual C++'s linker to use
    # Multi-Threaded (MT) mode. This fixes linkage in CUDA applications where
    # CMake fails to do so.
    if os.name == 'nt':
        if '_CL_' not in os.environ:
            os.environ['_CL_'] = '/MT'
        elif '/MT' not in os.environ['_CL_']:
            os.environ['_CL_'] = os.environ['_CL_'] + ' /MT'

    # Start forming CMake command
    dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    cmake_command = [
        "cmake",
        "-A x64" if os.name == 'nt' else "",  # Windows-specific flag
        '"' + os.path.join(dace_path, "codegen") + '"',
        "-DDACE_SRC_DIR=\"{}\"".format(src_folder),
        "-DDACE_FILES=\"{}\"".format(";".join(files)),
        "-DDACE_PROGRAM_NAME={}".format(program_name),
    ]

    # Get required environments are retrieve the CMake information
    environments = set(l.strip() for l in open(
        os.path.join(program_folder, "dace_environments.csv"), "r"))
    cmake_minimum_version = [0]
    cmake_variables = dict()
    cmake_packages = set()
    cmake_includes = set()
    cmake_libraries = set()
    cmake_compile_flags = set()
    cmake_link_flags = set()
    cmake_files = set()
    cmake_module_paths = set()
    for env_name in environments:
        env = dace.library.get_environment(env_name)
        if (env.cmake_minimum_version is not None
                and len(env.cmake_minimum_version) > 0):
            version_list = list(map(int, env.cmake_minimum_version.split(".")))
            for i in range(max(len(version_list), len(cmake_minimum_version))):
                if i >= len(version_list):
                    break
                if i >= len(cmake_minimum_version):
                    cmake_minimum_version = version_list
                    break
                if version_list[i] > cmake_minimum_version[i]:
                    cmake_minimum_version = version_list
                    break
                # Otherwise keep iterating
        for var in env.cmake_variables:
            if (var in cmake_variables
                    and cmake_variables[var] != env.cmake_variables[var]):
                raise KeyError(
                    "CMake variable {} was redefined from {} to {}.".format(
                        var, cmake_variables[var], env.cmake_variables[var]))
            cmake_variables[var] = env.cmake_variables[var]
        cmake_packages |= set(env.cmake_packages)
        cmake_includes |= set(env.cmake_includes)
        cmake_libraries |= set(env.cmake_libraries)
        cmake_compile_flags |= set(env.cmake_compile_flags)
        cmake_link_flags |= set(env.cmake_link_flags)
        # Make path absolute
        env_dir = os.path.dirname(env._dace_file_path)
        cmake_files |= set(
            (f if os.path.isabs(f) else os.path.join(env_dir, f)) +
            (".cmake" if not f.endswith(".cmake") else "")
            for f in env.cmake_files)
        for header in env.headers:
            if os.path.isabs(header):
                # Giving an absolute path is not good practice, but allow it
                # for emergency overriding
                cmake_includes.add(os.path.dirname(header))
            abs_path = os.path.join(env_dir, header)
            if os.path.isfile(abs_path):
                # Allow includes stored with the library, specified with a
                # relative path
                cmake_includes.add(env_dir)
                break
    environment_flags = [
        "-DDACE_ENV_MINIMUM_VERSION={}".format(".".join(
            map(str, cmake_minimum_version))),
        # Make CMake list of key-value pairs
        "-DDACE_ENV_VAR_KEYS=\"{}\"".format(";".join(cmake_variables.keys())),
        "-DDACE_ENV_VAR_VALUES=\"{}\"".format(";".join(
            cmake_variables.values())),
        "-DDACE_ENV_PACKAGES=\"{}\"".format(" ".join(cmake_packages)),
        "-DDACE_ENV_INCLUDES=\"{}\"".format(" ".join(cmake_includes)),
        "-DDACE_ENV_LIBRARIES=\"{}\"".format(" ".join(cmake_libraries)),
        "-DDACE_ENV_COMPILE_FLAGS=\"{}\"".format(
            " ".join(cmake_compile_flags)),
        # "-DDACE_ENV_LINK_FLAGS=\"{}\"".format(" ".join(cmake_link_flags)),
        "-DDACE_ENV_CMAKE_FILES=\"{}\"".format(";".join(cmake_files)),
    ]
    # Escape variable expansions to defer their evaluation
    environment_flags = [
        cmd.replace("$", "_DACE_CMAKE_EXPAND") for cmd in environment_flags
    ]
    cmake_command += environment_flags

    # Replace backslashes with forward slashes
    cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command]

    # Generate CMake options for each compiler
    libraries = set()
    for target_name, target in targets.items():
        try:
            cmake_command += target.cmake_options()
            libraries |= unique_flags(
                Config.get("compiler", target_name, "libs"))
        except KeyError:
            pass
        except ValueError as ex:  # Cannot find compiler executable
            raise CompilerConfigurationError(str(ex))

    cmake_command.append("-DDACE_LIBS=\"{}\"".format(" ".join(libraries)))

    # Override linker and linker arguments
    if Config.get('compiler', 'linker', 'executable'):
        cmake_command.append("-DCMAKE_LINKER=\"{}\"".format(
            make_absolute(Config.get('compiler', 'linker', 'executable'))))
    if Config.get('compiler', 'linker', 'args'):
        cmake_command.append(
            "-DCMAKE_SHARED_LINKER_FLAGS=\"{}\"".format(
                Config.get('compiler', 'linker', 'args') + " " +
                " ".join(cmake_link_flags)), )
    cmake_command = ' '.join(cmake_command)

    cmake_filename = os.path.join(build_folder, 'cmake_configure.sh')
    ##############################################
    # Configure
    try:
        _run_liveoutput(cmake_command,
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # Clean CMake directory and try once more
        if Config.get_bool('debugprint'):
            print('Cleaning CMake build folder and retrying...')
        shutil.rmtree(build_folder)
        os.makedirs(build_folder)
        try:
            _run_liveoutput(cmake_command,
                            shell=True,
                            cwd=build_folder,
                            output_stream=output_stream)
        except subprocess.CalledProcessError as ex:
            # If still unsuccessful, print results
            if Config.get_bool('debugprint'):
                raise CompilerConfigurationError('Configuration failure')
            else:
                raise CompilerConfigurationError('Configuration failure:\n' +
                                                 ex.output)

        with open(cmake_filename, "w") as fp:
            fp.write(cmake_command)

    # Compile and link
    try:
        _run_liveoutput("cmake --build . --config %s" %
                        (Config.get('compiler', 'build_type')),
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # If unsuccessful, print results
        if Config.get_bool('debugprint'):
            raise CompilationError('Compiler failure')
        else:
            raise CompilationError('Compiler failure:\n' + ex.output)

    shared_library_path = os.path.join(
        build_folder,
        "lib{}.{}".format(program_name,
                          Config.get('compiler', 'library_extension')))

    return shared_library_path
Esempio n. 9
0
    def apply(self, sdfg):
        first_state = sdfg.nodes()[self.subgraph[StateFusion._first_state]]
        second_state = sdfg.nodes()[self.subgraph[StateFusion._second_state]]

        # Remove interstate edge(s)
        edges = sdfg.edges_between(first_state, second_state)
        for edge in edges:
            if edge.data.assignments:
                for src, dst, other_data in sdfg.in_edges(first_state):
                    other_data.assignments.update(edge.data.assignments)
            sdfg.remove_edge(edge)

        # Special case 1: first state is empty
        if first_state.is_empty():
            sdutil.change_edge_dest(sdfg, first_state, second_state)
            sdfg.remove_node(first_state)
            return

        # Special case 2: second state is empty
        if second_state.is_empty():
            sdutil.change_edge_src(sdfg, second_state, first_state)
            sdutil.change_edge_dest(sdfg, second_state, first_state)
            sdfg.remove_node(second_state)
            return

        # Normal case: both states are not empty

        # Find source/sink (data) nodes
        first_input = [
            node for node in sdutil.find_source_nodes(first_state)
            if isinstance(node, nodes.AccessNode)
        ]
        first_output = [
            node for node in sdutil.find_sink_nodes(first_state)
            if isinstance(node, nodes.AccessNode)
        ]
        second_input = [
            node for node in sdutil.find_source_nodes(second_state)
            if isinstance(node, nodes.AccessNode)
        ]

        # first input = first input - first output
        first_input = [
            node for node in first_input
            if next((x for x in first_output
                     if x.label == node.label), None) is None
        ]

        # Merge second state to first state
        # First keep a backup of the topological sorted order of the nodes
        order = [
            x for x in reversed(list(nx.topological_sort(first_state._nx)))
            if isinstance(x, nodes.AccessNode)
        ]
        for node in second_state.nodes():
            first_state.add_node(node)
        for src, src_conn, dst, dst_conn, data in second_state.edges():
            first_state.add_edge(src, src_conn, dst, dst_conn, data)

        # Merge common (data) nodes
        for node in second_input:
            if first_state.in_degree(node) == 0:
                n = next((x for x in order if x.label == node.label), None)
                if n:
                    sdutil.change_edge_src(first_state, node, n)
                    first_state.remove_node(node)
                    n.access = dtypes.AccessType.ReadWrite

        # Redirect edges and remove second state
        sdutil.change_edge_src(sdfg, second_state, first_state)
        sdfg.remove_node(second_state)
        if Config.get_bool("debugprint"):
            StateFusion._states_fused += 1
Esempio n. 10
0
def generate_code(sdfg) -> List[CodeObject]:
    """ Generates code as a list of code objects for a given SDFG.
        :param sdfg: The SDFG to use
        :return: List of code objects that correspond to files to compile.
    """
    # Before compiling, validate SDFG correctness
    sdfg.validate()

    if Config.get_bool('testing', 'serialization'):
        from dace.sdfg import SDFG
        import filecmp
        import shutil
        import tempfile
        with tempfile.TemporaryDirectory() as tmp_dir:
            sdfg.save(f'{tmp_dir}/test.sdfg')
            sdfg2 = SDFG.from_file(f'{tmp_dir}/test.sdfg')
            sdfg2.save(f'{tmp_dir}/test2.sdfg')
            print('Testing SDFG serialization...')
            if not filecmp.cmp(f'{tmp_dir}/test.sdfg',
                               f'{tmp_dir}/test2.sdfg'):
                shutil.move(f"{tmp_dir}/test.sdfg", "test.sdfg")
                shutil.move(f"{tmp_dir}/test2.sdfg", "test2.sdfg")
                raise RuntimeError(
                    'SDFG serialization failed - files do not match')

        # Run with the deserialized version
        # NOTE: This means that all subsequent modifications to `sdfg`
        # are not reflected outside of this function (e.g., library
        # node expansion).
        sdfg = sdfg2

    # Before generating the code, run type inference on the SDFG connectors
    infer_types.infer_connector_types(sdfg)

    # Set default storage/schedule types in SDFG
    infer_types.set_default_schedule_and_storage_types(sdfg, None)

    # Recursively expand library nodes that have not yet been expanded
    sdfg.expand_library_nodes()

    # After expansion, run another pass of connector/type inference
    infer_types.infer_connector_types(sdfg)
    infer_types.set_default_schedule_and_storage_types(sdfg, None)

    frame = framecode.DaCeCodeGenerator()

    # Instantiate CPU first (as it is used by the other code generators)
    # TODO: Refactor the parts used by other code generators out of CPU
    default_target = cpu.CPUCodeGen
    for k, v in target.TargetCodeGenerator.extensions().items():
        # If another target has already been registered as CPU, use it instead
        if v['name'] == 'cpu':
            default_target = k
    targets = {'cpu': default_target(frame, sdfg)}

    # Instantiate the rest of the targets
    targets.update({
        v['name']: k(frame, sdfg)
        for k, v in target.TargetCodeGenerator.extensions().items()
        if v['name'] not in targets
    })

    # Instantiate all instrumentation providers in SDFG
    provider_mapping = InstrumentationProvider.get_provider_mapping()
    frame._dispatcher.instrumentation[
        dtypes.InstrumentationType.No_Instrumentation] = None
    for node, _ in sdfg.all_nodes_recursive():
        if hasattr(node, 'instrument'):
            frame._dispatcher.instrumentation[node.instrument] = \
                provider_mapping[node.instrument]
        elif hasattr(node, 'consume'):
            frame._dispatcher.instrumentation[node.consume.instrument] = \
                provider_mapping[node.consume.instrument]
        elif hasattr(node, 'map'):
            frame._dispatcher.instrumentation[node.map.instrument] = \
                provider_mapping[node.map.instrument]
    if sdfg.instrument != dtypes.InstrumentationType.No_Instrumentation:
        frame._dispatcher.instrumentation[sdfg.instrument] = \
            provider_mapping[sdfg.instrument]
    frame._dispatcher.instrumentation = {
        k: v() if v is not None else None
        for k, v in frame._dispatcher.instrumentation.items()
    }

    # Generate frame code (and the rest of the code)
    (global_code, frame_code, used_targets,
     used_environments) = frame.generate_code(sdfg, None)
    target_objects = [
        CodeObject(sdfg.name,
                   global_code + frame_code,
                   'cpp',
                   cpu.CPUCodeGen,
                   'Frame',
                   environments=used_environments)
    ]

    # Create code objects for each target
    for tgt in used_targets:
        target_objects.extend(tgt.get_generated_codeobjects())

    # add a header file for calling the SDFG
    dummy = CodeObject(sdfg.name,
                       generate_headers(sdfg),
                       'h',
                       cpu.CPUCodeGen,
                       'CallHeader',
                       target_type='../../include',
                       linkable=False)
    target_objects.append(dummy)

    # add a dummy main function to show how to call the SDFG
    dummy = CodeObject(sdfg.name + "_main",
                       generate_dummy(sdfg),
                       'cpp',
                       cpu.CPUCodeGen,
                       'SampleMain',
                       target_type='../../sample',
                       linkable=False)
    target_objects.append(dummy)

    return target_objects
Esempio n. 11
0
    def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
        """ Main function that controls argument construction for calling
            the C prototype of the SDFG.

            Organizes arguments first by `sdfg.arglist`, then data descriptors
            by alphabetical order, then symbols by alphabetical order.
        """
        # Return value initialization (for values that have not been given)
        self._initialize_return_values(kwargs)
        if self._return_arrays is not None:
            if len(self._retarray_shapes) == 1:
                kwargs[self._retarray_shapes[0][0]] = self._return_arrays
            else:
                for desc, arr in zip(self._retarray_shapes,
                                     self._return_arrays):
                    kwargs[desc[0]] = arr

        # Argument construction
        sig = self._sig
        typedict = self._typedict
        if len(kwargs) > 0:
            # Construct mapping from arguments to signature
            arglist = []
            argtypes = []
            argnames = []
            for a in sig:
                try:
                    arglist.append(kwargs[a])
                    argtypes.append(typedict[a])
                    argnames.append(a)
                except KeyError:
                    raise KeyError("Missing program argument \"{}\"".format(a))
        else:
            arglist = []
            argtypes = []
            argnames = []
            sig = []
        # Type checking
        for a, arg, atype in zip(argnames, arglist, argtypes):
            if not dtypes.is_array(arg) and isinstance(atype, dt.Array):
                if isinstance(arg, list):
                    print('WARNING: Casting list argument "%s" to ndarray' % a)
                elif arg is None:
                    # None values are passed as null pointers
                    pass
                else:
                    raise TypeError(
                        'Passing an object (type %s) to an array in argument "%s"'
                        % (type(arg).__name__, a))
            elif dtypes.is_array(arg) and not isinstance(atype, dt.Array):
                # GPU scalars are pointers, so this is fine
                if atype.storage != dtypes.StorageType.GPU_Global:
                    raise TypeError(
                        'Passing an array to a scalar (type %s) in argument "%s"'
                        % (atype.dtype.ctype, a))
            elif not isinstance(atype, dt.Array) and not isinstance(
                    atype.dtype, dtypes.callback) and not isinstance(
                        arg,
                        (atype.dtype.type,
                         sp.Basic)) and not (isinstance(arg, symbolic.symbol)
                                             and arg.dtype == atype.dtype):
                if isinstance(arg, int) and atype.dtype.type == np.int64:
                    pass
                elif isinstance(arg, float) and atype.dtype.type == np.float64:
                    pass
                elif (isinstance(arg, int) and atype.dtype.type == np.int32
                      and abs(arg) <= (1 << 31) - 1):
                    pass
                elif (isinstance(arg, int) and atype.dtype.type == np.uint32
                      and arg >= 0 and arg <= (1 << 32) - 1):
                    pass
                else:
                    print(
                        'WARNING: Casting scalar argument "%s" from %s to %s' %
                        (a, type(arg).__name__, atype.dtype.type))
            elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray)
                  and atype.dtype.as_numpy_dtype() != arg.dtype):
                # Make exception for vector types
                if (isinstance(atype.dtype, dtypes.vector)
                        and atype.dtype.vtype.as_numpy_dtype() == arg.dtype):
                    pass
                else:
                    print(
                        'WARNING: Passing %s array argument "%s" to a %s array'
                        % (arg.dtype, a, atype.dtype.type.__name__))
            elif (isinstance(atype, dt.Array) and isinstance(arg, np.ndarray)
                  and arg.base is not None and not '__return' in a
                  and not Config.get_bool('compiler', 'allow_view_arguments')):
                raise TypeError(
                    'Passing a numpy view (e.g., sub-array or "A.T") to DaCe '
                    'programs is not allowed in order to retain analyzability. '
                    'Please make a copy with "numpy.copy(...)". If you know what '
                    'you are doing, you can override this error in the '
                    'configuration by setting compiler.allow_view_arguments '
                    'to True.')

        # Explicit casting
        for index, (arg, argtype) in enumerate(zip(arglist, argtypes)):
            # Call a wrapper function to make NumPy arrays from pointers.
            if isinstance(argtype.dtype, dtypes.callback):
                arglist[index] = argtype.dtype.get_trampoline(arg, kwargs)
            # List to array
            elif isinstance(arg, list) and isinstance(argtype, dt.Array):
                arglist[index] = np.array(arg, dtype=argtype.dtype.type)
            # Null pointer
            elif arg is None and isinstance(argtype, dt.Array):
                arglist[index] = ctypes.c_void_p(0)

        # Retain only the element datatype for upcoming checks and casts
        arg_ctypes = [t.dtype.as_ctypes() for t in argtypes]

        sdfg = self._sdfg

        # Obtain SDFG constants
        constants = sdfg.constants

        # Remove symbolic constants from arguments
        callparams = tuple(
            (arg, actype, atype)
            for arg, actype, atype in zip(arglist, arg_ctypes, argtypes)
            if not symbolic.issymbolic(arg) or (
                hasattr(arg, 'name') and arg.name not in constants))

        # Replace symbols with their values
        callparams = tuple(
            (actype(arg.get()), actype,
             atype) if isinstance(arg, symbolic.symbol) else (arg, actype,
                                                              atype)
            for arg, actype, atype in callparams)

        # Replace arrays with their base host/device pointers
        newargs = tuple(
            (ctypes.c_void_p(_array_interface_ptr(arg, atype)), actype,
             atype) if dtypes.is_array(arg) else (arg, actype, atype)
            for arg, actype, atype in callparams)

        initargs = tuple(atup for atup in callparams
                         if not dtypes.is_array(atup[0]))

        newargs = tuple(
            actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg
            for arg, actype, atype in newargs)

        initargs = tuple(
            actype(arg) if (not isinstance(arg, ctypes._SimpleCData)) else arg
            for arg, actype, atype in initargs)

        self._lastargs = newargs, initargs
        return self._lastargs
Esempio n. 12
0
    def apply(self, sdfg: sd.SDFG):

        #######################################################
        # Step 0: SDFG metadata

        # Find all input and output data descriptors
        input_nodes = []
        output_nodes = []
        global_code_nodes = [[] for _ in sdfg.nodes()]

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.desc(sdfg).transient == False):
                    if (state.out_degree(node) > 0
                            and node.data not in input_nodes):
                        input_nodes.append((node.data, node.desc(sdfg)))
                    if (state.in_degree(node) > 0
                            and node.data not in output_nodes):
                        output_nodes.append((node.data, node.desc(sdfg)))
                elif isinstance(node, nodes.CodeNode) and sdict[node] is None:
                    if not isinstance(node, nodes.EmptyTasklet):
                        global_code_nodes[i].append(node)

            # Input nodes may also be nodes with WCR memlets and no identity
            for e in state.edges():
                if e.data.wcr is not None and e.data.wcr_identity is None:
                    if (e.data.data not in input_nodes
                            and sdfg.arrays[e.data.data].transient == False):
                        input_nodes.append((e.data.data,
                                            sdfg.arrays[e.data.data]))

        start_state = sdfg.start_state
        end_states = sdfg.sink_nodes()

        #######################################################
        # Step 1: Create cloned GPU arrays and replace originals

        cloned_arrays = {}
        for inodename, inode in set(input_nodes):
            newdesc = inode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            sdfg.add_datadesc('gpu_' + inodename, newdesc)
            cloned_arrays[inodename] = 'gpu_' + inodename

        for onodename, onode in set(output_nodes):
            if onodename in cloned_arrays:
                continue
            newdesc = onode.clone()
            newdesc.storage = dtypes.StorageType.GPU_Global
            newdesc.transient = True
            sdfg.add_datadesc('gpu_' + onodename, newdesc)
            cloned_arrays[onodename] = 'gpu_' + onodename

        # Replace nodes
        for state in sdfg.nodes():
            for node in state.nodes():
                if (isinstance(node, nodes.AccessNode)
                        and node.data in cloned_arrays):
                    node.data = cloned_arrays[node.data]

        # Replace memlets
        for state in sdfg.nodes():
            for edge in state.edges():
                if edge.data.data in cloned_arrays:
                    edge.data.data = cloned_arrays[edge.data.data]

        #######################################################
        # Step 2: Create copy-in state
        excluded_copyin = self.exclude_copyin.split(',')

        copyin_state = sdfg.add_state(sdfg.label + '_copyin')
        sdfg.add_edge(copyin_state, start_state, ed.InterstateEdge())

        for nname, desc in set(input_nodes):
            if nname in excluded_copyin:
                continue
            src_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(
                cloned_arrays[nname], debuginfo=desc.debuginfo)
            copyin_state.add_node(src_array)
            copyin_state.add_node(dst_array)
            copyin_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(src_array.data, src_array.desc(sdfg)))

        #######################################################
        # Step 3: Create copy-out state
        excluded_copyout = self.exclude_copyout.split(',')

        copyout_state = sdfg.add_state(sdfg.label + '_copyout')
        for state in end_states:
            sdfg.add_edge(state, copyout_state, ed.InterstateEdge())

        for nname, desc in set(output_nodes):
            if nname in excluded_copyout:
                continue
            src_array = nodes.AccessNode(
                cloned_arrays[nname], debuginfo=desc.debuginfo)
            dst_array = nodes.AccessNode(nname, debuginfo=desc.debuginfo)
            copyout_state.add_node(src_array)
            copyout_state.add_node(dst_array)
            copyout_state.add_nedge(
                src_array, dst_array,
                memlet.Memlet.from_array(dst_array.data, dst_array.desc(sdfg)))

        #######################################################
        # Step 4: Modify transient data storage

        for state in sdfg.nodes():
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node,
                              nodes.AccessNode) and node.desc(sdfg).transient:
                    nodedesc = node.desc(sdfg)

                    # Special case: nodes that lead to dynamic map ranges must
                    # stay on host
                    if any(
                            isinstance(
                                state.memlet_path(e)[-1].dst, nodes.EntryNode)
                            for e in state.out_edges(node)):
                        continue

                    if sdict[node] is None:
                        # NOTE: the cloned arrays match too but it's the same
                        # storage so we don't care
                        nodedesc.storage = dtypes.StorageType.GPU_Global

                        # Try to move allocation/deallocation out of loops
                        if (self.toplevel_trans
                                and not isinstance(nodedesc, data.Stream)):
                            nodedesc.toplevel = True
                    else:
                        # Make internal transients registers
                        if self.register_trans:
                            nodedesc.storage = dtypes.StorageType.Register

        #######################################################
        # Step 5: Wrap free tasklets and nested SDFGs with a GPU map

        for state, gcodes in zip(sdfg.nodes(), global_code_nodes):
            for gcode in gcodes:
                # Create map and connectors
                me, mx = state.add_map(
                    gcode.label + '_gmap', {gcode.label + '__gmapi': '0:1'},
                    schedule=dtypes.ScheduleType.GPU_Device)
                # Store in/out edges in lists so that they don't get corrupted
                # when they are removed from the graph
                in_edges = list(state.in_edges(gcode))
                out_edges = list(state.out_edges(gcode))
                me.in_connectors = set('IN_' + e.dst_conn for e in in_edges)
                me.out_connectors = set('OUT_' + e.dst_conn for e in in_edges)
                mx.in_connectors = set('IN_' + e.src_conn for e in out_edges)
                mx.out_connectors = set('OUT_' + e.src_conn for e in out_edges)

                # Create memlets through map
                for e in in_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, me, 'IN_' + e.dst_conn,
                                   e.data)
                    state.add_edge(me, 'OUT_' + e.dst_conn, e.dst, e.dst_conn,
                                   e.data)
                for e in out_edges:
                    state.remove_edge(e)
                    state.add_edge(e.src, e.src_conn, mx, 'IN_' + e.src_conn,
                                   e.data)
                    state.add_edge(mx, 'OUT_' + e.src_conn, e.dst, e.dst_conn,
                                   e.data)

                # Map without inputs
                if len(in_edges) == 0:
                    state.add_nedge(me, gcode, memlet.EmptyMemlet())
        #######################################################
        # Step 6: Change all top-level maps to GPU maps

        for i, state in enumerate(sdfg.nodes()):
            sdict = state.scope_dict()
            for node in state.nodes():
                if isinstance(node, nodes.EntryNode):
                    if sdict[node] is None:
                        node.schedule = dtypes.ScheduleType.GPU_Device
                    elif self.sequential_innermaps:
                        node.schedule = dtypes.ScheduleType.Sequential

        #######################################################
        # Step 7: Introduce copy-out if data used in outgoing interstate edges

        for state in list(sdfg.nodes()):
            arrays_used = set()
            for e in sdfg.out_edges(state):
                # Used arrays = intersection between symbols and cloned arrays
                arrays_used.update(
                    set(e.data.condition_symbols()) & set(cloned_arrays.keys())
                )

            # Create a state and copy out used arrays
            if len(arrays_used) > 0:
                co_state = sdfg.add_state(state.label + '_icopyout')

                # Reconnect outgoing edges to after interim copyout state
                for e in sdfg.out_edges(state):
                    nxutil.change_edge_src(sdfg, state, co_state)
                # Add unconditional edge to interim state
                sdfg.add_edge(state, co_state, ed.InterstateEdge())

                # Add copy-out nodes
                for nname in arrays_used:
                    desc = sdfg.arrays[nname]
                    src_array = nodes.AccessNode(
                        cloned_arrays[nname], debuginfo=desc.debuginfo)
                    dst_array = nodes.AccessNode(
                        nname, debuginfo=desc.debuginfo)
                    co_state.add_node(src_array)
                    co_state.add_node(dst_array)
                    co_state.add_nedge(
                        src_array, dst_array,
                        memlet.Memlet.from_array(dst_array.data,
                                                 dst_array.desc(sdfg)))

        #######################################################
        # Step 8: Strict transformations
        if not self.strict_transform:
            return

        # Apply strict state fusions greedily.
        opt = optimizer.SDFGOptimizer(sdfg, inplace=True)
        fusions = 0
        arrays = 0
        options = [
            match for match in opt.get_pattern_matches(strict=True)
            if isinstance(match, (StateFusion, RedundantArray))
        ]
        while options:
            ssdfg = sdfg.sdfg_list[options[0].sdfg_id]
            options[0].apply(ssdfg)
            ssdfg.validate()
            if isinstance(options[0], StateFusion):
                fusions += 1
            if isinstance(options[0], RedundantArray):
                arrays += 1

            options = [
                match for match in opt.get_pattern_matches(strict=True)
                if isinstance(match, (StateFusion, RedundantArray))
            ]

        if Config.get_bool('debugprint') and (fusions > 0 or arrays > 0):
            print('Automatically applied {} strict state fusions and removed'
                  ' {} redundant arrays.'.format(fusions, arrays))
Esempio n. 13
0
def configure_and_compile(program_folder,
                          program_name=None,
                          output_stream=None):
    """ Configures and compiles a DaCe program in the specified folder into a
        shared library file.

        :param program_folder: Folder containing all files necessary to build,
                               equivalent to what was passed to
                               `generate_program_folder`.
        :param output_stream: Additional output stream to write to (used for
                              DIODE client).
        :return: Path to the compiled shared library file.
    """

    if program_name is None:
        program_name = os.path.basename(program_folder)
    program_folder = os.path.abspath(program_folder)
    src_folder = os.path.join(program_folder, "src")

    # Prepare build folder
    build_folder = os.path.join(program_folder, "build")
    os.makedirs(build_folder, exist_ok=True)

    # Prepare performance report folder
    os.makedirs(os.path.join(program_folder, "perf"), exist_ok=True)

    # Read list of DaCe files to compile.
    # We do this instead of iterating over source files in the directory to
    # avoid globbing files from previous compilations, such that we don't need
    # to wipe the directory for every compilation.
    file_list = [
        line.strip().split(",")
        for line in open(os.path.join(program_folder, "dace_files.csv"), "r")
    ]

    # Get absolute paths and targets for all source files
    files = []
    targets = {}  # {target name: target class}
    for target_name, target_type, file_name in file_list:
        if target_type:
            path = os.path.join(target_name, target_type, file_name)
        else:
            path = os.path.join(target_name, file_name)
        files.append(path)
        targets[target_name] = next(
            k for k, v in TargetCodeGenerator.extensions().items()
            if v['name'] == target_name)

    # Windows-only workaround: Override Visual C++'s linker to use
    # Multi-Threaded (MT) mode. This fixes linkage in CUDA applications where
    # CMake fails to do so.
    if os.name == 'nt':
        if '_CL_' not in os.environ:
            os.environ['_CL_'] = '/MT'
        elif '/MT' not in os.environ['_CL_']:
            os.environ['_CL_'] = os.environ['_CL_'] + ' /MT'

    # Start forming CMake command
    dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    cmake_command = [
        "cmake",
        "-A x64" if os.name == 'nt' else "",  # Windows-specific flag
        '"' + os.path.join(dace_path, "codegen") + '"',
        "-DDACE_SRC_DIR=\"{}\"".format(src_folder),
        "-DDACE_FILES=\"{}\"".format(";".join(files)),
        "-DDACE_PROGRAM_NAME={}".format(program_name),
    ]

    # Get required environments are retrieve the CMake information
    environments = set(l.strip() for l in open(
        os.path.join(program_folder, "dace_environments.csv"), "r"))

    environments = dace.library.get_environments_and_dependencies(environments)

    environment_flags, cmake_link_flags = get_environment_flags(environments)
    cmake_command += environment_flags

    # Replace backslashes with forward slashes
    cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command]

    # Generate CMake options for each compiler
    libraries = set()
    for target_name, target in targets.items():
        try:
            cmake_command += target.cmake_options()
            libraries |= unique_flags(
                Config.get("compiler", target_name, "libs"))
        except KeyError:
            pass
        except ValueError as ex:  # Cannot find compiler executable
            raise cgx.CompilerConfigurationError(str(ex))

    cmake_command.append("-DDACE_LIBS=\"{}\"".format(" ".join(libraries)))

    # Override linker and linker arguments
    if Config.get('compiler', 'linker', 'executable'):
        cmake_command.append("-DCMAKE_LINKER=\"{}\"".format(
            make_absolute(Config.get('compiler', 'linker', 'executable'))))
    if Config.get('compiler', 'linker', 'args') is not None:
        cmake_command.append(
            "-DCMAKE_SHARED_LINKER_FLAGS=\"{}\"".format(
                Config.get('compiler', 'linker', 'args') + " " +
                " ".join(cmake_link_flags)), )
    cmake_command = ' '.join(cmake_command)

    cmake_filename = os.path.join(build_folder, 'cmake_configure.sh')
    ##############################################
    # Configure
    try:
        _run_liveoutput(cmake_command,
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # Clean CMake directory and try once more
        if Config.get_bool('debugprint'):
            print('Cleaning CMake build folder and retrying...')
        shutil.rmtree(build_folder)
        os.makedirs(build_folder)
        try:
            _run_liveoutput(cmake_command,
                            shell=True,
                            cwd=build_folder,
                            output_stream=output_stream)
        except subprocess.CalledProcessError as ex:
            # If still unsuccessful, print results
            if Config.get_bool('debugprint'):
                raise cgx.CompilerConfigurationError('Configuration failure')
            else:
                raise cgx.CompilerConfigurationError(
                    'Configuration failure:\n' + ex.output)

        with open(cmake_filename, "w") as fp:
            fp.write(cmake_command)

    # Compile and link
    try:
        _run_liveoutput("cmake --build . --config %s" %
                        (Config.get('compiler', 'build_type')),
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # If unsuccessful, print results
        if Config.get_bool('debugprint'):
            raise cgx.CompilationError('Compiler failure')
        else:
            raise cgx.CompilationError('Compiler failure:\n' + ex.output)

    shared_library_path = os.path.join(
        build_folder,
        "lib{}.{}".format(program_name,
                          Config.get('compiler', 'library_extension')))

    return shared_library_path
Esempio n. 14
0
 def __init__(self, base_indentation=0):
     super(CodeIOStream, self).__init__()
     self._indent = 0
     self._spaces = int(Config.get('compiler', 'indentation_spaces'))
     self._lineinfo = Config.get_bool('compiler', 'codegen_lineinfo')
Esempio n. 15
0
def generate_code(sdfg) -> List[CodeObject]:
    """ Generates code as a list of code objects for a given SDFG.
        :param sdfg: The SDFG to use
        :return: List of code objects that correspond to files to compile.
    """
    # Before compiling, validate SDFG correctness
    sdfg.validate()

    if Config.get_bool('testing', 'serialization'):
        from dace.sdfg import SDFG
        import filecmp
        sdfg.save('test.sdfg')
        sdfg2 = SDFG.from_file('test.sdfg')
        sdfg2.save('test2.sdfg')
        print('Testing SDFG serialization...')
        if not filecmp.cmp('test.sdfg', 'test2.sdfg'):
            raise RuntimeError(
                'SDFG serialization failed - files do not match')
        os.remove('test.sdfg')
        os.remove('test2.sdfg')

        # Run with the deserialized version
        sdfg = sdfg2

    # Before generating the code, run type inference on the SDFG connectors
    infer_connector_types(sdfg)

    frame = framecode.DaCeCodeGenerator()

    # Instantiate CPU first (as it is used by the other code generators)
    # TODO: Refactor the parts used by other code generators out of CPU
    default_target = cpu.CPUCodeGen
    for k, v in target.TargetCodeGenerator.extensions().items():
        # If another target has already been registered as CPU, use it instead
        if v['name'] == 'cpu':
            default_target = k
    targets = {'cpu': default_target(frame, sdfg)}

    # Instantiate the rest of the targets
    targets.update({
        v['name']: k(frame, sdfg)
        for k, v in target.TargetCodeGenerator.extensions().items()
        if v['name'] not in targets
    })

    # Instantiate all instrumentation providers in SDFG
    provider_mapping = InstrumentationProvider.get_provider_mapping()
    frame._dispatcher.instrumentation[
        dtypes.InstrumentationType.No_Instrumentation] = None
    for node, _ in sdfg.all_nodes_recursive():
        if hasattr(node, 'instrument'):
            frame._dispatcher.instrumentation[node.instrument] = \
                provider_mapping[node.instrument]
        elif hasattr(node, 'consume'):
            frame._dispatcher.instrumentation[node.consume.instrument] = \
                provider_mapping[node.consume.instrument]
        elif hasattr(node, 'map'):
            frame._dispatcher.instrumentation[node.map.instrument] = \
                provider_mapping[node.map.instrument]
    frame._dispatcher.instrumentation = {
        k: v() if v is not None else None
        for k, v in frame._dispatcher.instrumentation.items()
    }

    # Generate frame code (and the rest of the code)
    (global_code, frame_code, used_targets,
     used_environments) = frame.generate_code(sdfg, None)
    target_objects = [
        CodeObject(sdfg.name,
                   global_code + frame_code,
                   'cpp',
                   cpu.CPUCodeGen,
                   'Frame',
                   environments=used_environments)
    ]

    # Create code objects for each target
    for tgt in used_targets:
        target_objects.extend(tgt.get_generated_codeobjects())

    # add a header file for calling the SDFG
    dummy = CodeObject(sdfg.name,
                       generate_headers(sdfg),
                       'h',
                       cpu.CPUCodeGen,
                       'CallHeader',
                       linkable=False)
    target_objects.append(dummy)

    # add a dummy main function to show how to call the SDFG
    dummy = CodeObject(sdfg.name + "_main",
                       generate_dummy(sdfg),
                       'cpp',
                       cpu.CPUCodeGen,
                       'DummyMain',
                       linkable=False)
    target_objects.append(dummy)

    return target_objects
Esempio n. 16
0
def configure_and_compile(program_folder, program_name=None):
    """ Configures and compiles a DaCe program in the specified folder into a
        shared library file.

        @param program_folder: Folder containing all files necessary to build,
                               equivalent to what was passed to
                               `generate_program_folder`.
        @return: Path to the compiled shared library file.
    """

    if program_name is None:
        program_name = os.path.basename(program_folder)
    program_folder = os.path.abspath(program_folder)
    src_folder = os.path.join(program_folder, "src")

    # Prepare build folder
    build_folder = os.path.join(program_folder, "build")
    try:
        os.makedirs(build_folder)
    except FileExistsError:
        pass

    # Read list of DaCe files to compile.
    # We do this instead of iterating over source files in the directory to
    # avoid globbing files from previous compilations, such that we don't need
    # to wipe the directory for every compilation.
    file_list = [
        line.strip().split(",")
        for line in open(os.path.join(program_folder, "dace_files.csv"), "r")
    ]

    # Get absolute paths and targets for all source files
    files = []
    targets = {}  # {target name: target class}
    for target_name, file_name in file_list:
        path = os.path.join(src_folder, target_name, file_name)
        files.append(path)
        targets[target_name] = codegen.STRING_TO_TARGET[target_name]

    # Start forming CMake command
    dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    cmake_command = [
        "cmake",
        "-A x64" if os.name == 'nt' else "",  # Windows-specific flag
        '"' + os.path.join(dace_path, "codegen") + '"',
        "-DDACE_FILES=\"{}\"".format(";".join(files)),
        "-DDACE_PROGRAM_NAME={}".format(program_name),
    ]

    # Replace backslashes with forward slashes
    cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command]

    # Generate CMake options for each compiler
    libraries = set()
    for target_name, target in targets.items():
        cmake_command += target.cmake_options()
        try:
            libraries |= unique_flags(
                Config.get("compiler", target_name, "libs"))
        except KeyError:
            pass

    # TODO: it should be possible to use the default arguments/compilers
    #       found by CMake
    cmake_command += [
        "-DDACE_LIBS=\"{}\"".format(" ".join(libraries)),
        "-DCMAKE_LINKER=\"{}\"".format(
            make_absolute(Config.get('compiler', 'linker', 'executable'))),
        "-DCMAKE_SHARED_LINKER_FLAGS=\"{}\"".format(
            Config.get('compiler', 'linker', 'args') +
            Config.get('compiler', 'linker', 'additional_args')),
    ]

    ##############################################
    # Configure
    try:
        _run_liveoutput(" ".join(cmake_command), shell=True, cwd=build_folder)
    except subprocess.CalledProcessError as ex:
        # Clean CMake directory and try once more
        if Config.get_bool('debugprint'):
            print('Cleaning CMake build folder and retrying...')
        shutil.rmtree(build_folder)
        os.makedirs(build_folder)
        try:
            _run_liveoutput(
                " ".join(cmake_command), shell=True, cwd=build_folder)
        except subprocess.CalledProcessError as ex:
            # If still unsuccessful, print results
            if Config.get_bool('debugprint'):
                raise CompilerConfigurationError('Configuration failure')
            else:
                raise CompilerConfigurationError('Configuration failure:\n' +
                                                 ex.output)

    # Compile and link
    try:
        _run_liveoutput(
            "cmake --build . --config %s" % (Config.get(
                'compiler', 'build_type')),
            shell=True,
            cwd=build_folder)
    except subprocess.CalledProcessError as ex:
        # If unsuccessful, print results
        if Config.get_bool('debugprint'):
            raise CompilationError('Compiler failure')
        else:
            raise CompilationError('Compiler failure:\n' + ex.output)

    shared_library_path = os.path.join(
        build_folder, "lib{}.{}".format(
            program_name, Config.get('compiler', 'library_extension')))

    return shared_library_path
Esempio n. 17
0
    def apply(self, sdfg):
        first_state = sdfg.nodes()[self.subgraph[StateFusion._first_state]]
        second_state = sdfg.nodes()[self.subgraph[StateFusion._second_state]]

        # Remove interstate edge(s)
        edges = sdfg.edges_between(first_state, second_state)
        for edge in edges:
            if edge.data.assignments:
                for src, dst, other_data in sdfg.in_edges(first_state):
                    other_data.assignments.update(edge.data.assignments)
            sdfg.remove_edge(edge)

        # Special case 1: first state is empty
        if first_state.is_empty():
            nxutil.change_edge_dest(sdfg, first_state, second_state)
            sdfg.remove_node(first_state)
            return

        # Special case 2: second state is empty
        if second_state.is_empty():
            nxutil.change_edge_src(sdfg, second_state, first_state)
            nxutil.change_edge_dest(sdfg, second_state, first_state)
            sdfg.remove_node(second_state)
            return

        # Normal case: both states are not empty

        # Find source/sink (data) nodes
        first_input = [
            node for node in nxutil.find_source_nodes(first_state)
            if isinstance(node, nodes.AccessNode)
        ]
        first_output = [
            node for node in nxutil.find_sink_nodes(first_state)
            if isinstance(node, nodes.AccessNode)
        ]
        second_input = [
            node for node in nxutil.find_source_nodes(second_state)
            if isinstance(node, nodes.AccessNode)
        ]

        # first input = first input - first output
        first_input = [
            node for node in first_input
            if next((x for x in first_output
                     if x.label == node.label), None) is None
        ]

        # Merge second state to first state
        for node in second_state.nodes():
            first_state.add_node(node)
        for src, src_conn, dst, dst_conn, data in second_state.edges():
            first_state.add_edge(src, src_conn, dst, dst_conn, data)

        # Merge common (data) nodes
        for node in first_input:
            try:
                old_node = next(x for x in second_input
                                if x.label == node.label)
            except StopIteration:
                continue
            nxutil.change_edge_src(first_state, old_node, node)
            first_state.remove_node(old_node)
            second_input.remove(old_node)
        for node in first_output:
            try:
                new_node = next(x for x in second_input
                                if x.label == node.label)
            except StopIteration:
                continue
            nxutil.change_edge_dest(first_state, node, new_node)
            first_state.remove_node(node)
            second_input.remove(new_node)

        # Redirect edges and remove second state
        nxutil.change_edge_src(sdfg, second_state, first_state)
        sdfg.remove_node(second_state)
        if Config.get_bool("debugprint"):
            StateFusion._states_fused += 1
Esempio n. 18
0
def generate_code(sdfg, validate=True) -> List[CodeObject]:
    """ Generates code as a list of code objects for a given SDFG.
        :param sdfg: The SDFG to use
        :param validate: If True, validates the SDFG before generating the code.
        :return: List of code objects that correspond to files to compile.
    """
    from dace.codegen.targets.target import TargetCodeGenerator  # Avoid import loop

    # Before compiling, validate SDFG correctness
    if validate:
        sdfg.validate()

    if Config.get_bool('testing', 'serialization'):
        from dace.sdfg import SDFG
        import filecmp
        import shutil
        import tempfile
        with tempfile.TemporaryDirectory() as tmp_dir:
            sdfg.save(f'{tmp_dir}/test.sdfg')
            sdfg2 = SDFG.from_file(f'{tmp_dir}/test.sdfg')
            sdfg2.save(f'{tmp_dir}/test2.sdfg')
            print('Testing SDFG serialization...')
            if not filecmp.cmp(f'{tmp_dir}/test.sdfg',
                               f'{tmp_dir}/test2.sdfg'):
                shutil.move(f"{tmp_dir}/test.sdfg", "test.sdfg")
                shutil.move(f"{tmp_dir}/test2.sdfg", "test2.sdfg")
                raise RuntimeError(
                    'SDFG serialization failed - files do not match')

        # Run with the deserialized version
        # NOTE: This means that all subsequent modifications to `sdfg`
        # are not reflected outside of this function (e.g., library
        # node expansion).
        sdfg = sdfg2

    # Before generating the code, run type inference on the SDFG connectors
    infer_types.infer_connector_types(sdfg)

    # Set default storage/schedule types in SDFG
    infer_types.set_default_schedule_and_storage_types(sdfg, None)

    # Recursively expand library nodes that have not yet been expanded
    sdfg.expand_library_nodes()

    # After expansion, run another pass of connector/type inference
    infer_types.infer_connector_types(sdfg)
    infer_types.set_default_schedule_and_storage_types(sdfg, None)

    frame = framecode.DaCeCodeGenerator(sdfg)

    # Instantiate CPU first (as it is used by the other code generators)
    # TODO: Refactor the parts used by other code generators out of CPU
    default_target = cpu.CPUCodeGen
    for k, v in TargetCodeGenerator.extensions().items():
        # If another target has already been registered as CPU, use it instead
        if v['name'] == 'cpu':
            default_target = k
    targets = {'cpu': default_target(frame, sdfg)}

    # Instantiate the rest of the targets
    targets.update({
        v['name']: k(frame, sdfg)
        for k, v in TargetCodeGenerator.extensions().items()
        if v['name'] not in targets
    })

    # Query all code generation targets and instrumentation providers in SDFG
    _get_codegen_targets(sdfg, frame)

    # Preprocess SDFG
    for target in frame.targets:
        target.preprocess(sdfg)

    # Instantiate instrumentation providers
    frame._dispatcher.instrumentation = {
        k: v() if v is not None else None
        for k, v in frame._dispatcher.instrumentation.items()
    }

    # NOTE: THE SDFG IS ASSUMED TO BE FROZEN (not change) FROM THIS POINT ONWARDS

    # Generate frame code (and the rest of the code)
    (global_code, frame_code, used_targets,
     used_environments) = frame.generate_code(sdfg, None)
    target_objects = [
        CodeObject(sdfg.name,
                   global_code + frame_code,
                   'cpp',
                   cpu.CPUCodeGen,
                   'Frame',
                   environments=used_environments,
                   sdfg=sdfg)
    ]

    # Create code objects for each target
    for tgt in used_targets:
        target_objects.extend(tgt.get_generated_codeobjects())

    # Ensure that no new targets were dynamically added
    assert frame._dispatcher.used_targets == (frame.targets - {frame})

    # add a header file for calling the SDFG
    dummy = CodeObject(sdfg.name,
                       generate_headers(sdfg, frame),
                       'h',
                       cpu.CPUCodeGen,
                       'CallHeader',
                       target_type='../../include',
                       linkable=False)
    target_objects.append(dummy)

    for env in dace.library.get_environments_and_dependencies(
            used_environments):
        if hasattr(env, "codeobjects"):
            target_objects.extend(env.codeobjects)

    # add a dummy main function to show how to call the SDFG
    dummy = CodeObject(sdfg.name + "_main",
                       generate_dummy(sdfg, frame),
                       'cpp',
                       cpu.CPUCodeGen,
                       'SampleMain',
                       target_type='../../sample',
                       linkable=False)
    target_objects.append(dummy)

    return target_objects
Esempio n. 19
0
    def optimize(self):
        """ A command-line UI for applying patterns on the SDFG.
            :return: An optimized SDFG object
        """
        sdfg_file = self.sdfg.name + '.sdfg'
        if os.path.isfile(sdfg_file):
            ui_input = input('An SDFG with the filename "%s" was found. '
                             'Would you like to use it instead? [Y/n] ' %
                             sdfg_file)
            if len(ui_input) == 0 or ui_input[0] not in ['n', 'N']:
                return dace.SDFG.from_file(sdfg_file)

        # Visualize SDFGs during optimization process
        VISUALIZE_SDFV = Config.get_bool('optimizer', 'visualize_sdfv')
        SAVE_INTERMEDIATE = Config.get_bool('optimizer', 'save_intermediate')

        if SAVE_INTERMEDIATE:
            self.sdfg.save(os.path.join('_dacegraphs', 'before.sdfg'))
            if VISUALIZE_SDFV:
                from diode import sdfv
                sdfv.view(os.path.join('_dacegraphs', 'before.sdfg'))

        # Optimize until there is not pattern matching or user stops the process.
        pattern_counter = 0
        while True:
            # Print in the UI all the pattern matching options.
            ui_options = sorted(self.get_pattern_matches())
            ui_options_idx = 0
            for pattern_match in ui_options:
                sdfg = self.sdfg.sdfg_list[pattern_match.sdfg_id]
                print('%d. Transformation %s' %
                      (ui_options_idx, pattern_match.print_match(sdfg)))
                ui_options_idx += 1

            # If no pattern matchings were found, quit.
            if ui_options_idx == 0:
                print('No viable transformations found')
                break

            ui_input = input(
                'Select the pattern to apply (0 - %d or name$id): ' %
                (ui_options_idx - 1))

            pattern_name, occurrence, param_dict = _parse_cli_input(ui_input)

            pattern_match = None
            if (pattern_name is None and occurrence >= 0
                    and occurrence < ui_options_idx):
                pattern_match = ui_options[occurrence]
            elif pattern_name is not None:
                counter = 0
                for match in ui_options:
                    if type(match).__name__ == pattern_name:
                        if occurrence == counter:
                            pattern_match = match
                            break
                        counter = counter + 1

            if pattern_match is None:
                print(
                    'You did not select a valid option. Quitting optimization ...'
                )
                break

            match_id = (str(occurrence) if pattern_name is None else '%s$%d' %
                        (pattern_name, occurrence))
            sdfg = self.sdfg.sdfg_list[pattern_match.sdfg_id]
            print('You selected (%s) pattern %s with parameters %s' %
                  (match_id, pattern_match.print_match(sdfg), str(param_dict)))

            # Set each parameter of the parameter dictionary separately
            for k, v in param_dict.items():
                setattr(pattern_match, k, v)

            pattern_match.apply(sdfg)
            self.applied_patterns.add(type(pattern_match))

            if SAVE_INTERMEDIATE:
                filename = 'after_%d_%s_b4lprop' % (
                    pattern_counter + 1, type(pattern_match).__name__)
                self.sdfg.save(os.path.join('_dacegraphs', filename + '.sdfg'))

            if not pattern_match.annotates_memlets():
                propagation.propagate_memlets_sdfg(self.sdfg)

            if True:
                pattern_counter += 1
                if SAVE_INTERMEDIATE:
                    filename = 'after_%d_%s' % (pattern_counter,
                                                type(pattern_match).__name__)
                    self.sdfg.save(
                        os.path.join('_dacegraphs', filename + '.sdfg'))

                    if VISUALIZE_SDFV:
                        from diode import sdfv
                        sdfv.view(
                            os.path.join('_dacegraphs', filename + '.sdfg'))

        return self.sdfg
Esempio n. 20
0
    def apply(self, sdfg):
        graph = sdfg.nodes()[self.state_id]
        if self.expr_index == 0:
            cnode = graph.nodes()[self.subgraph[GPUTransformMap._map_entry]]
            node_schedprop = cnode.map
            exit_nodes = graph.exit_nodes(cnode)
        else:
            cnode = graph.nodes()[self.subgraph[GPUTransformMap._reduce]]
            node_schedprop = cnode
            exit_nodes = [cnode]

        # Change schedule
        node_schedprop._schedule = dtypes.ScheduleType.GPU_Device
        if Config.get_bool("debugprint"):
            GPUTransformMap._maps_transformed += 1

        gpu_storage_types = [
            dtypes.StorageType.GPU_Global,
            dtypes.StorageType.GPU_Shared,
            dtypes.StorageType.GPU_Stack  #, dtypes.StorageType.CPU_Pinned
        ]

        #######################################################
        # Add GPU copies of CPU arrays (i.e., not already on GPU)

        # First, understand which arrays to clone
        all_out_edges = []
        for enode in exit_nodes:
            all_out_edges.extend(list(graph.out_edges(enode)))
        in_arrays_to_clone = set()
        out_arrays_to_clone = set()
        out_streamarrays = {}
        for e in graph.in_edges(cnode):
            data_node = sd.find_input_arraynode(graph, e)
            if isinstance(data_node.desc(sdfg), data.Scalar):
                continue
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                in_arrays_to_clone.add(data_node)
        for e in all_out_edges:
            data_node = sd.find_output_arraynode(graph, e)
            if isinstance(data_node.desc(sdfg), data.Scalar):
                continue
            if data_node.desc(sdfg).storage not in gpu_storage_types:
                # Stream directly connected to an array
                if sd.is_array_stream_view(sdfg, graph, data_node):
                    datadesc = data_node.desc(sdfg)
                    if datadesc.transient is False:
                        raise TypeError('Non-transient stream-array view are '
                                        'unsupported')
                    # Add parent node to clone
                    out_arrays_to_clone.add(graph.out_edges(data_node)[0].dst)
                    out_streamarrays[graph.out_edges(data_node)
                                     [0].dst] = data_node

                    # Do not clone stream
                    continue

                out_arrays_to_clone.add(data_node)
        if Config.get_bool("debugprint"):
            GPUTransformMap._arrays_removed += len(in_arrays_to_clone) + len(
                out_arrays_to_clone)

        # Second, create a GPU clone of each array
        cloned_arrays = {}
        in_cloned_arraynodes = {}
        out_cloned_arraynodes = {}
        for array_node in in_arrays_to_clone:
            array = array_node.desc(sdfg)
            if array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                cloned_array = array.clone()
                cloned_array.storage = dtypes.StorageType.GPU_Global
                cloned_array.transient = True
                sdfg.add_datadesc('gpu_' + array_node.data, cloned_array)
                cloned_arrays[array_node.data] = 'gpu_' + array_node.data
            cloned_node = type(array_node)('gpu_' + array_node.data)

            in_cloned_arraynodes[array_node.data] = cloned_node
        for array_node in out_arrays_to_clone:
            array = array_node.desc(sdfg)
            if array_node.data in cloned_arrays:
                cloned_array = cloned_arrays[array_node.data]
            else:
                cloned_array = array.clone()
                cloned_array.storage = dtypes.StorageType.GPU_Global
                cloned_array.transient = True
                sdfg.add_datadesc('gpu_' + array_node.data, cloned_array)
                cloned_arrays[array_node.data] = 'gpu_' + array_node.data
            cloned_node = type(array_node)('gpu_' + array_node.data)

            out_cloned_arraynodes[array_node.data] = cloned_node

        # Third, connect the cloned arrays to the originals
        # TODO(later): Shift indices and create only the necessary sub-arrays
        for array_name, node in in_cloned_arraynodes.items():
            graph.add_node(node)
            for edge in graph.in_edges(cnode):
                if edge.data.data == array_name:
                    graph.remove_edge(edge)
                    newmemlet = copy.copy(edge.data)
                    newmemlet.data = node.data
                    graph.add_edge(node, edge.src_conn, edge.dst,
                                   edge.dst_conn, newmemlet)

                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = edge.data.subset
                    graph.add_edge(edge.src, None, node, None, edge.data)
        for array_name, node in out_cloned_arraynodes.items():
            graph.add_node(node)
            for edge in all_out_edges:
                if edge.data.data == array_name:
                    graph.remove_edge(edge)
                    newmemlet = copy.copy(edge.data)
                    newmemlet.data = node.data
                    graph.add_edge(edge.src, edge.src_conn, node,
                                   edge.dst_conn, newmemlet)
                    edge.data.wcr = None
                    if self.fullcopy:
                        edge.data.subset = sbs.Range.from_array(
                            node.desc(sdfg))
                    edge.data.other_subset = edge.data.subset
                    graph.add_edge(node, None, edge.dst, None, edge.data)

        # Reconnect stream-arrays
        for array_node, streamnode in out_streamarrays.items():
            # Set stream storage to GPU
            streamnode.desc(sdfg).storage = dtypes.StorageType.GPU_Global

            cloned_node = out_cloned_arraynodes[array_node.data]

            e = graph.out_edges(streamnode)[0]
            graph.remove_edge(e)
            newmemlet = copy.copy(e.data)
            newmemlet.data = cloned_node.data
            # stream -> cloned array
            graph.add_edge(e.src, e.src_conn, cloned_node, e.dst_conn,
                           newmemlet)
            # cloned array -> array
            graph.add_nedge(cloned_node, array_node, e.data)

        # Fourth, replace memlet arrays as necessary
        if self.expr_index == 0:
            scope_subgraph = graph.scope_subgraph(cnode)
            for edge in scope_subgraph.edges():
                if (edge.data.data is not None
                        and edge.data.data in cloned_arrays):
                    edge.data.data = cloned_arrays[edge.data.data]