예제 #1
0
def _set_default_schedule_in_scope(state: SDFGState,
                                   parent_node: nodes.Node,
                                   parent_schedule: dtypes.ScheduleType,
                                   reverse_scope_dict: Dict[nodes.Node,
                                                            List[nodes.Node]],
                                   use_parent_schedule: bool = False):
    for node in reverse_scope_dict[parent_node]:
        if use_parent_schedule:
            child_schedule = parent_schedule
            if parent_schedule in (dtypes.ScheduleType.Default,
                                   dtypes.ScheduleType.GPU_Default,
                                   dtypes.ScheduleType.GPU_Multidevice):
                child_schedule = dtypes.SCOPEDEFAULT_SCHEDULE[parent_schedule]
        else:
            child_schedule = dtypes.SCOPEDEFAULT_SCHEDULE[parent_schedule]
        # Set default schedule type
        if isinstance(node, nodes.MapEntry):
            if (parent_schedule is dtypes.ScheduleType.GPU_Device
                    and has_dynamic_map_inputs(state, node)):
                node.map.schedule = dtypes.ScheduleType.GPU_ThreadBlock_Dynamic
            if node.map.schedule is dtypes.ScheduleType.Default:
                node.map.schedule = child_schedule
            # Also traverse children (recursively)
            _set_default_schedule_in_scope(state, node, node.map.schedule,
                                           reverse_scope_dict)
        elif isinstance(node, nodes.ConsumeEntry):
            if node.consume.schedule is dtypes.ScheduleType.Default:
                node.consume.schedule = child_schedule

            # Also traverse children (recursively)
            _set_default_schedule_in_scope(state, node, node.consume.schedule,
                                           reverse_scope_dict)
        elif isinstance(node, nodes.NestedSDFG):
            # Nested SDFGs retain same schedule as their parent scope
            if node.schedule is dtypes.ScheduleType.Default:
                node.schedule = parent_schedule
            if node.schedule is dtypes.ScheduleType.GPU_Multidevice:
                node.sdfg.openmp_sections = False
            _set_default_schedule_types(node.sdfg, node.schedule)
        elif getattr(node, 'schedule', False):
            if node.schedule is dtypes.ScheduleType.Default:
                node.schedule = (
                    child_schedule if isinstance(node, nodes.EntryNode)
                    or parent_schedule is None else parent_schedule)
예제 #2
0
    def can_be_applied(graph: SDFGState,
                       candidate: Dict[xf.PatternNode, int],
                       expr_index: int,
                       sdfg: SDFG,
                       permissive: bool = False) -> bool:
        access = graph.node(candidate[StreamingMemory.access])
        # Make sure the access node is only accessed once (read or write),
        # and not at the same time
        if graph.out_degree(access) > 0 and graph.in_degree(access) > 0:
            return False

        # If already a stream, skip
        if isinstance(sdfg.arrays[access.data], data.Stream):
            return False
        # If does not exist on off-chip memory, skip
        if sdfg.arrays[access.data].storage not in [
                dtypes.StorageType.CPU_Heap, dtypes.StorageType.CPU_Pinned,
                dtypes.StorageType.GPU_Global, dtypes.StorageType.FPGA_Global
        ]:
            return False

        # Only free nodes are allowed (search up the SDFG tree)
        curstate = graph
        node = access
        while curstate is not None:
            if curstate.entry_node(node) is not None:
                return False
            if curstate.parent.parent_nsdfg_node is None:
                break
            node = curstate.parent.parent_nsdfg_node
            curstate = curstate.parent.parent

        # Only one memlet path is allowed per outgoing/incoming edge
        edges = (graph.out_edges(access)
                 if expr_index == 0 else graph.in_edges(access))
        for edge in edges:
            mpath = graph.memlet_path(edge)
            if len(mpath) != len(list(graph.memlet_tree(edge))):
                return False

            # The innermost end of the path must have a clearly defined memory
            # access pattern
            innermost_edge = mpath[-1] if expr_index == 0 else mpath[0]
            if (innermost_edge.data.subset.num_elements() != 1
                    or innermost_edge.data.dynamic
                    or innermost_edge.data.volume != 1):
                return False

            # Check if any of the maps has a dynamic range
            # These cases can potentially work but some nodes (and perhaps
            # tasklets) need to be replicated, which are difficult to track.
            for pe in mpath:
                node = pe.dst if expr_index == 0 else graph.entry_node(pe.src)
                if isinstance(
                        node,
                        nodes.MapEntry) and sdutil.has_dynamic_map_inputs(
                            graph, node):
                    return False

        # If already applied on this memlet and this is the I/O component, skip
        if expr_index == 0:
            other_node = graph.node(candidate[StreamingMemory.entry])
        else:
            other_node = graph.node(candidate[StreamingMemory.exit])
            other_node = graph.entry_node(other_node)
        if other_node.label.startswith('__s'):
            return False

        return True
예제 #3
0
    def can_be_applied(self,
                       graph: SDFGState,
                       expr_index: int,
                       sdfg: SDFG,
                       permissive: bool = False) -> bool:
        access = self.access
        # Make sure the access node is only accessed once (read or write),
        # and not at the same time
        if graph.out_degree(access) > 0 and graph.in_degree(access) > 0:
            return False

        # If already a stream, skip
        if isinstance(sdfg.arrays[access.data], data.Stream):
            return False
        # If does not exist on off-chip memory, skip
        if sdfg.arrays[access.data].storage not in [
                dtypes.StorageType.CPU_Heap, dtypes.StorageType.CPU_Pinned,
                dtypes.StorageType.GPU_Global, dtypes.StorageType.FPGA_Global
        ]:
            return False

        # Only free nodes are allowed (search up the SDFG tree)
        curstate = graph
        node = access
        while curstate is not None:
            if curstate.entry_node(node) is not None:
                return False
            if curstate.parent.parent_nsdfg_node is None:
                break
            node = curstate.parent.parent_nsdfg_node
            curstate = curstate.parent.parent

        # Only one memlet path is allowed per outgoing/incoming edge
        edges = (graph.out_edges(access)
                 if expr_index == 0 else graph.in_edges(access))
        for edge in edges:
            mpath = graph.memlet_path(edge)
            if len(mpath) != len(list(graph.memlet_tree(edge))):
                return False

            # The innermost end of the path must have a clearly defined memory
            # access pattern
            innermost_edge = mpath[-1] if expr_index == 0 else mpath[0]
            if (innermost_edge.data.subset.num_elements() != 1
                    or innermost_edge.data.dynamic
                    or innermost_edge.data.volume != 1):
                return False

            # Check if any of the maps has a dynamic range
            # These cases can potentially work but some nodes (and perhaps
            # tasklets) need to be replicated, which are difficult to track.
            for pe in mpath:
                node = pe.dst if expr_index == 0 else graph.entry_node(pe.src)
                if isinstance(
                        node,
                        nodes.MapEntry) and sdutil.has_dynamic_map_inputs(
                            graph, node):
                    return False

        # If already applied on this memlet and this is the I/O component, skip
        if expr_index == 0:
            other_node = self.entry
        else:
            other_node = self.exit
            other_node = graph.entry_node(other_node)
        if other_node.label.startswith('__s'):
            return False

        ## Check Memory Buffering Properties
        if self.use_memory_buffering:

            access = self.access
            desc = sdfg.arrays[access.data]

            # Array has to be global array
            if desc.storage != dtypes.StorageType.FPGA_Global:
                return False

            # Type has to divide target bytes
            if self.memory_buffering_target_bytes % desc.dtype.bytes != 0:
                return False

            # Target bytes has to be >= size of data type
            if self.memory_buffering_target_bytes < desc.dtype.bytes:
                return False

            strides = list(desc.strides)

            # Last stride has to be one
            if strides[-1] != 1:
                return False

            vector_size = int(self.memory_buffering_target_bytes /
                              desc.dtype.bytes)
            strides.pop()  # Remove last element since we already checked it

            # Other strides have to be divisible by vector size
            for stride in strides:

                if is_int(stride) and stride % vector_size != 0:
                    return False

            # Check if map has the right access pattern
            # Stride 1 access by innermost loop, innermost loop counter has to be divisible by vector size
            # Same code as in apply
            state = sdfg.node(self.state_id)
            dnode: nodes.AccessNode = self.access
            if self.expr_index == 0:
                edges = state.out_edges(dnode)
            else:
                edges = state.in_edges(dnode)

            mapping: Dict[
                Tuple[subsets.Range],
                List[gr.MultiConnectorEdge[mm.Memlet]]] = defaultdict(list)
            ranges = {}
            for edge in edges:
                mpath = state.memlet_path(edge)
                ranges[edge] = _collect_map_ranges(state, mpath)
                mapping[tuple(r[1] for r in ranges[edge])].append(edge)

            for edges_with_same_range in mapping.values():
                for edge in edges_with_same_range:
                    # Get memlet path and innermost edge
                    mpath = state.memlet_path(edge)
                    innermost_edge = copy.deepcopy(
                        mpath[-1] if self.expr_index == 0 else mpath[0])

                    edge_subset = [
                        a_tuple[0]
                        for a_tuple in list(innermost_edge.data.subset)
                    ]

                    if self.expr_index == 0:
                        map_subset = innermost_edge.src.map.params.copy()
                        ranges = list(innermost_edge.src.map.range)
                    else:
                        map_subset = innermost_edge.dst.map.params.copy()
                        ranges = list(innermost_edge.dst.map.range)

                    # Check is correct access pattern
                    # Correct ranges in map
                    if is_int(ranges[-1]
                              [1]) and (ranges[-1][1] + 1) % vector_size != 0:
                        return False

                    if ranges[-1][2] != 1:
                        return False

                    # Correct access in array
                    if isinstance(edge_subset[-1], symbol) and str(
                            edge_subset[-1]) == map_subset[-1]:
                        pass

                    elif isinstance(edge_subset[-1], sympy.core.add.Add):

                        counter: int = 0

                        for arg in edge_subset[-1].args:
                            if isinstance(
                                    arg,
                                    symbol) and str(arg) == map_subset[-1]:
                                counter += 1

                        if counter != 1:
                            return False

                    else:
                        return False

        return True