Esempio n. 1
0
    def pre_evaluate(self, cutout: dace.SDFG, measurements: int,
                     **kwargs) -> Dict:
        cutout.start_state.instrument = self.instrument

        map_entry = None
        for node in cutout.start_state.nodes():
            if isinstance(node, dace.nodes.MapEntry) and xfh.get_parent_map(
                    cutout.start_state, node) is None:
                map_entry = node
                break
        assert map_entry is not None

        new_kwargs = {
            "space_kwargs": {
                "map_entry": map_entry
            },
            "cutout":
            cutout.to_json(),
            "map_entry_id":
            cutout.start_state.node_id(map_entry),
            "measurements":
            measurements,
            "key":
            lambda point: "None"
            if point is None else ".".join(map(lambda p: str(p), point))
        }
        return new_kwargs
Esempio n. 2
0
    def cutouts(self) -> Generator[Tuple[dace.SDFG, str], None, None]:
        for node, state in self._sdfg.all_nodes_recursive():
            if isinstance(node, dace.nodes.MapEntry):
                if xfh.get_parent_map(state, node) is not None:
                    continue

                node_id = state.node_id(node)
                state_id = self._sdfg.node_id(state)
                subgraph_nodes = state.scope_subgraph(node).nodes()
                cutout = cutter.cutout_state(state, *subgraph_nodes)
                yield cutout, f"{state_id}.{node_id}.{node.label}"
Esempio n. 3
0
def fpga_update(sdfg, state, depth):
    scope_dict = state.scope_dict()
    for node in state.nodes():
        if (isinstance(node, nodes.AccessNode)
                and node.desc(sdfg).storage == dtypes.StorageType.Default):
            nodedesc = node.desc(sdfg)
            pmap = xfh.get_parent_map(state, node)
            if depth >= 2 or (pmap is not None and pmap[0].schedule
                              == dtypes.ScheduleType.FPGA_Device):
                nodedesc.storage = dtypes.StorageType.FPGA_Local
            else:
                if scope_dict[node]:
                    nodedesc.storage = dtypes.StorageType.FPGA_Local
                else:
                    nodedesc.storage = dtypes.StorageType.FPGA_Global
        if (hasattr(node, "schedule")
                and node.schedule == dace.dtypes.ScheduleType.Default):
            node.schedule = dace.dtypes.ScheduleType.FPGA_Device
        if isinstance(node, nodes.NestedSDFG):
            for s in node.sdfg.nodes():
                fpga_update(node.sdfg, s, depth + 1)
    def transfer(sdfg: dace.SDFG, tuner, k: int = 5):
        assert isinstance(tuner, OnTheFlyMapFusionTuner)

        dreport = sdfg.get_instrumented_data()
        assert dreport is not None

        tuning_report = tuner.optimize(apply=False)
        best_configs = cutout_tuner.CutoutTuner.top_k_configs(tuning_report,
                                                              k=k)
        subgraph_patterns = tuner._extract_patterns(best_configs)

        i = 0
        for nsdfg in sdfg.all_sdfgs_recursive():
            for state in nsdfg.states():
                i = i + 1

                top_maps = []
                for node in state.nodes():
                    if isinstance(node,
                                  dace.nodes.MapEntry) and xfh.get_parent_map(
                                      state, node) is None:
                        top_maps.append(node)

                if len(top_maps) < 2:
                    continue

                try:
                    cutout = cutter.cutout_state(state,
                                                 *(state.nodes()),
                                                 make_copy=False)
                except AttributeError:
                    continue

                while True:
                    base_runtime = None
                    best_pattern = None
                    best_pattern_runtime = math.inf
                    for j, pattern in enumerate(subgraph_patterns):
                        maps = []
                        for node in state.nodes():
                            if isinstance(
                                    node, dace.nodes.MapEntry
                            ) and xfh.get_parent_map(state, node) is None:
                                maps.append(node)

                        if len(maps) < 2:
                            continue

                        maps_desc = {}
                        state_desc = Counter()
                        for map_entry in maps:
                            map_desc = OnTheFlyMapFusionTuner.map_descriptor(
                                state, map_entry)
                            state_desc.update({map_desc: 1})

                            if not map_desc in maps_desc:
                                maps_desc[map_desc] = []

                            maps_desc[map_desc].append(map_entry)

                        included = True
                        for key in pattern:
                            if not key in state_desc or pattern[
                                    key] > state_desc[key]:
                                included = False
                                break

                        if not included:
                            continue

                        if base_runtime is None:
                            baseline = cutter.cutout_state(state,
                                                           *(state.nodes()),
                                                           make_copy=False)
                            baseline.start_state.instrument = dace.InstrumentationType.GPU_Events

                            dreport_ = {}
                            for cstate in baseline.nodes():
                                for dnode in cstate.data_nodes():
                                    array = baseline.arrays[dnode.data]
                                    if array.transient:
                                        continue
                                    try:
                                        data = dreport.get_first_version(
                                            dnode.data)
                                        dreport_[dnode.data] = data
                                    except:
                                        continue

                            base_runtime = optim_utils.subprocess_measure(
                                baseline, dreport_, i=192, j=192)
                            best_pattern_runtime = base_runtime
                            if base_runtime == math.inf:
                                break

                        # Construct subgraph greedily
                        subgraph_maps = []
                        for desc in pattern:
                            num = pattern[desc]
                            subgraph_maps.extend(maps_desc[desc][:num])

                        # Apply
                        experiment_sdfg_ = cutter.cutout_state(
                            state, *(state.nodes()), make_copy=False)
                        experiment_state_ = experiment_sdfg_.start_state
                        experiment_maps_ids = list(
                            map(lambda me: experiment_state_.node_id(me),
                                subgraph_maps))
                        experiment_sdfg = copy.deepcopy(experiment_sdfg_)
                        experiment_state = experiment_sdfg.start_state
                        experiment_state.instrument = dace.InstrumentationType.GPU_Events

                        experiment_maps = list(
                            map(lambda m_id: experiment_state.node(m_id),
                                experiment_maps_ids))
                        experiment_subgraph = helpers.subgraph_from_maps(
                            sdfg=experiment_sdfg,
                            graph=experiment_state,
                            map_entries=experiment_maps)

                        map_fusion = sg.SubgraphOTFFusion()
                        map_fusion.setup_match(
                            experiment_subgraph, experiment_sdfg.sdfg_id,
                            experiment_sdfg.node_id(experiment_state))
                        if map_fusion.can_be_applied(experiment_state,
                                                     experiment_sdfg):
                            try:
                                experiment_fuse_counter = map_fusion.apply(
                                    experiment_state, experiment_sdfg)
                            except:
                                continue

                            if experiment_fuse_counter == 0:
                                continue

                            dreport_ = {}
                            for cstate in experiment_sdfg.nodes():
                                for dnode in cstate.data_nodes():
                                    array = experiment_sdfg.arrays[dnode.data]
                                    if array.transient:
                                        continue
                                    try:
                                        data = dreport.get_first_version(
                                            dnode.data)
                                        dreport_[dnode.data] = data
                                    except:
                                        continue

                            fused_runtime = optim_utils.subprocess_measure(
                                experiment_sdfg, dreport_, i=192, j=192)
                            if fused_runtime >= best_pattern_runtime:
                                continue

                            best_pattern = subgraph_maps
                            best_pattern_runtime = fused_runtime

                    if best_pattern is not None:
                        subgraph = helpers.subgraph_from_maps(
                            sdfg=nsdfg, graph=state, map_entries=best_pattern)
                        map_fusion = sg.SubgraphOTFFusion()
                        map_fusion.setup_match(subgraph, nsdfg.sdfg_id,
                                               nsdfg.node_id(state))
                        actual_fuse_counter = map_fusion.apply(state, nsdfg)

                        best_pattern = None
                        base_runtime = None
                        best_pattern_runtime = math.inf
                    else:
                        break
Esempio n. 5
0
def make_transients_persistent(sdfg: SDFG,
                               device: dtypes.DeviceType,
                               toplevel_only: bool = True) -> None:
    ''' 
    Helper function to change several storage and scheduling properties
    - Makes non-view array lifetimes persistent, with some 
      restrictions depending on the device 
    - Reset nonatomic WCR edges on GPU 
    The only arrays that are made persistent by default are ones that do not exist inside a scope (and thus may be
    allocated multiple times), and whose symbols are always given as parameters to the SDFG (so that they can be
    allocated in a persistent manner).

    :param sdfg: SDFG
    :param device: Device type
    :param toplevel_only: If True, only converts access nodes that do not appear in any scope.
    '''
    for nsdfg in sdfg.all_sdfgs_recursive():
        fsyms: Set[str] = nsdfg.free_symbols
        persistent: Set[str] = set()
        not_persistent: Set[str] = set()

        for state in nsdfg.nodes():
            for dnode in state.data_nodes():
                if dnode.data in not_persistent:
                    continue
                desc = dnode.desc(nsdfg)
                # Only convert arrays and scalars that are not registers
                if not desc.transient or type(desc) not in {
                        dt.Array, dt.Scalar
                }:
                    not_persistent.add(dnode.data)
                    continue
                if desc.storage == dtypes.StorageType.Register:
                    not_persistent.add(dnode.data)
                    continue
                # Only convert arrays where the size depends on SDFG parameters
                try:
                    if set(map(str, desc.total_size.free_symbols)) - fsyms:
                        not_persistent.add(dnode.data)
                        continue
                except AttributeError:  # total_size is an integer / has no free symbols
                    pass

                # Only convert arrays with top-level access nodes
                if xfh.get_parent_map(state, dnode) is not None:
                    if toplevel_only:
                        not_persistent.add(dnode.data)
                        continue
                    elif desc.lifetime == dtypes.AllocationLifetime.Scope:
                        not_persistent.add(dnode.data)
                        continue

                persistent.add(dnode.data)

        for aname in (persistent - not_persistent):
            nsdfg.arrays[aname].lifetime = dtypes.AllocationLifetime.Persistent

    if device == dtypes.DeviceType.GPU:
        # Reset nonatomic WCR edges
        for n, _ in sdfg.all_nodes_recursive():
            if isinstance(n, SDFGState):
                for edge in n.edges():
                    edge.data.wcr_nonatomic = False