def _set_default_schedule_in_scope(state: SDFGState, parent_node: nodes.Node, parent_schedule: dtypes.ScheduleType, reverse_scope_dict: Dict[nodes.Node, List[nodes.Node]], use_parent_schedule: bool = False): for node in reverse_scope_dict[parent_node]: if use_parent_schedule: child_schedule = parent_schedule if parent_schedule in (dtypes.ScheduleType.Default, dtypes.ScheduleType.GPU_Default, dtypes.ScheduleType.GPU_Multidevice): child_schedule = dtypes.SCOPEDEFAULT_SCHEDULE[parent_schedule] else: child_schedule = dtypes.SCOPEDEFAULT_SCHEDULE[parent_schedule] # Set default schedule type if isinstance(node, nodes.MapEntry): if (parent_schedule is dtypes.ScheduleType.GPU_Device and has_dynamic_map_inputs(state, node)): node.map.schedule = dtypes.ScheduleType.GPU_ThreadBlock_Dynamic if node.map.schedule is dtypes.ScheduleType.Default: node.map.schedule = child_schedule # Also traverse children (recursively) _set_default_schedule_in_scope(state, node, node.map.schedule, reverse_scope_dict) elif isinstance(node, nodes.ConsumeEntry): if node.consume.schedule is dtypes.ScheduleType.Default: node.consume.schedule = child_schedule # Also traverse children (recursively) _set_default_schedule_in_scope(state, node, node.consume.schedule, reverse_scope_dict) elif isinstance(node, nodes.NestedSDFG): # Nested SDFGs retain same schedule as their parent scope if node.schedule is dtypes.ScheduleType.Default: node.schedule = parent_schedule if node.schedule is dtypes.ScheduleType.GPU_Multidevice: node.sdfg.openmp_sections = False _set_default_schedule_types(node.sdfg, node.schedule) elif getattr(node, 'schedule', False): if node.schedule is dtypes.ScheduleType.Default: node.schedule = ( child_schedule if isinstance(node, nodes.EntryNode) or parent_schedule is None else parent_schedule)
def can_be_applied(graph: SDFGState, candidate: Dict[xf.PatternNode, int], expr_index: int, sdfg: SDFG, permissive: bool = False) -> bool: access = graph.node(candidate[StreamingMemory.access]) # Make sure the access node is only accessed once (read or write), # and not at the same time if graph.out_degree(access) > 0 and graph.in_degree(access) > 0: return False # If already a stream, skip if isinstance(sdfg.arrays[access.data], data.Stream): return False # If does not exist on off-chip memory, skip if sdfg.arrays[access.data].storage not in [ dtypes.StorageType.CPU_Heap, dtypes.StorageType.CPU_Pinned, dtypes.StorageType.GPU_Global, dtypes.StorageType.FPGA_Global ]: return False # Only free nodes are allowed (search up the SDFG tree) curstate = graph node = access while curstate is not None: if curstate.entry_node(node) is not None: return False if curstate.parent.parent_nsdfg_node is None: break node = curstate.parent.parent_nsdfg_node curstate = curstate.parent.parent # Only one memlet path is allowed per outgoing/incoming edge edges = (graph.out_edges(access) if expr_index == 0 else graph.in_edges(access)) for edge in edges: mpath = graph.memlet_path(edge) if len(mpath) != len(list(graph.memlet_tree(edge))): return False # The innermost end of the path must have a clearly defined memory # access pattern innermost_edge = mpath[-1] if expr_index == 0 else mpath[0] if (innermost_edge.data.subset.num_elements() != 1 or innermost_edge.data.dynamic or innermost_edge.data.volume != 1): return False # Check if any of the maps has a dynamic range # These cases can potentially work but some nodes (and perhaps # tasklets) need to be replicated, which are difficult to track. for pe in mpath: node = pe.dst if expr_index == 0 else graph.entry_node(pe.src) if isinstance( node, nodes.MapEntry) and sdutil.has_dynamic_map_inputs( graph, node): return False # If already applied on this memlet and this is the I/O component, skip if expr_index == 0: other_node = graph.node(candidate[StreamingMemory.entry]) else: other_node = graph.node(candidate[StreamingMemory.exit]) other_node = graph.entry_node(other_node) if other_node.label.startswith('__s'): return False return True
def can_be_applied(self, graph: SDFGState, expr_index: int, sdfg: SDFG, permissive: bool = False) -> bool: access = self.access # Make sure the access node is only accessed once (read or write), # and not at the same time if graph.out_degree(access) > 0 and graph.in_degree(access) > 0: return False # If already a stream, skip if isinstance(sdfg.arrays[access.data], data.Stream): return False # If does not exist on off-chip memory, skip if sdfg.arrays[access.data].storage not in [ dtypes.StorageType.CPU_Heap, dtypes.StorageType.CPU_Pinned, dtypes.StorageType.GPU_Global, dtypes.StorageType.FPGA_Global ]: return False # Only free nodes are allowed (search up the SDFG tree) curstate = graph node = access while curstate is not None: if curstate.entry_node(node) is not None: return False if curstate.parent.parent_nsdfg_node is None: break node = curstate.parent.parent_nsdfg_node curstate = curstate.parent.parent # Only one memlet path is allowed per outgoing/incoming edge edges = (graph.out_edges(access) if expr_index == 0 else graph.in_edges(access)) for edge in edges: mpath = graph.memlet_path(edge) if len(mpath) != len(list(graph.memlet_tree(edge))): return False # The innermost end of the path must have a clearly defined memory # access pattern innermost_edge = mpath[-1] if expr_index == 0 else mpath[0] if (innermost_edge.data.subset.num_elements() != 1 or innermost_edge.data.dynamic or innermost_edge.data.volume != 1): return False # Check if any of the maps has a dynamic range # These cases can potentially work but some nodes (and perhaps # tasklets) need to be replicated, which are difficult to track. for pe in mpath: node = pe.dst if expr_index == 0 else graph.entry_node(pe.src) if isinstance( node, nodes.MapEntry) and sdutil.has_dynamic_map_inputs( graph, node): return False # If already applied on this memlet and this is the I/O component, skip if expr_index == 0: other_node = self.entry else: other_node = self.exit other_node = graph.entry_node(other_node) if other_node.label.startswith('__s'): return False ## Check Memory Buffering Properties if self.use_memory_buffering: access = self.access desc = sdfg.arrays[access.data] # Array has to be global array if desc.storage != dtypes.StorageType.FPGA_Global: return False # Type has to divide target bytes if self.memory_buffering_target_bytes % desc.dtype.bytes != 0: return False # Target bytes has to be >= size of data type if self.memory_buffering_target_bytes < desc.dtype.bytes: return False strides = list(desc.strides) # Last stride has to be one if strides[-1] != 1: return False vector_size = int(self.memory_buffering_target_bytes / desc.dtype.bytes) strides.pop() # Remove last element since we already checked it # Other strides have to be divisible by vector size for stride in strides: if is_int(stride) and stride % vector_size != 0: return False # Check if map has the right access pattern # Stride 1 access by innermost loop, innermost loop counter has to be divisible by vector size # Same code as in apply state = sdfg.node(self.state_id) dnode: nodes.AccessNode = self.access if self.expr_index == 0: edges = state.out_edges(dnode) else: edges = state.in_edges(dnode) mapping: Dict[ Tuple[subsets.Range], List[gr.MultiConnectorEdge[mm.Memlet]]] = defaultdict(list) ranges = {} for edge in edges: mpath = state.memlet_path(edge) ranges[edge] = _collect_map_ranges(state, mpath) mapping[tuple(r[1] for r in ranges[edge])].append(edge) for edges_with_same_range in mapping.values(): for edge in edges_with_same_range: # Get memlet path and innermost edge mpath = state.memlet_path(edge) innermost_edge = copy.deepcopy( mpath[-1] if self.expr_index == 0 else mpath[0]) edge_subset = [ a_tuple[0] for a_tuple in list(innermost_edge.data.subset) ] if self.expr_index == 0: map_subset = innermost_edge.src.map.params.copy() ranges = list(innermost_edge.src.map.range) else: map_subset = innermost_edge.dst.map.params.copy() ranges = list(innermost_edge.dst.map.range) # Check is correct access pattern # Correct ranges in map if is_int(ranges[-1] [1]) and (ranges[-1][1] + 1) % vector_size != 0: return False if ranges[-1][2] != 1: return False # Correct access in array if isinstance(edge_subset[-1], symbol) and str( edge_subset[-1]) == map_subset[-1]: pass elif isinstance(edge_subset[-1], sympy.core.add.Add): counter: int = 0 for arg in edge_subset[-1].args: if isinstance( arg, symbol) and str(arg) == map_subset[-1]: counter += 1 if counter != 1: return False else: return False return True