def get_successor_relation(self): successors = {} block_bounds = get_block_boundaries(self.kernel.schedule) for idx, (item, next_item) in enumerate( zip(reversed(self.schedule), reversed(self.schedule + [None]))): sched_idx = len(self.schedule) - idx - 1 # Look at next_item if next_item is None: after = set() elif isinstance(next_item, EnterLoop): # Account for empty loop loop_end = block_bounds[sched_idx + 1] after = successors[loop_end] | set([sched_idx + 1]) elif isinstance(next_item, (LeaveLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier)): after = set([sched_idx + 1]) else: raise LoopyError( "unexpected type of schedule item: {ty}".format( ty=type(next_item).__name__)) # Look at item if isinstance(item, LeaveLoop): # Account for loop loop_begin = block_bounds[sched_idx] after |= set([loop_begin]) elif not isinstance(item, (EnterLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier)): raise LoopyError( "unexpected type of schedule item: {ty}".format( ty=type(item).__name__)) successors[sched_idx] = after return successors
def get_successor_relation(self): successors = {} block_bounds = get_block_boundaries(self.kernel.schedule) for idx, (item, next_item) in enumerate(zip( reversed(self.schedule), reversed(self.schedule + [None]))): sched_idx = len(self.schedule) - idx - 1 # Look at next_item if next_item is None: after = set() elif isinstance(next_item, EnterLoop): # Account for empty loop loop_end = block_bounds[sched_idx + 1] after = successors[loop_end] | set([sched_idx + 1]) elif isinstance(next_item, (LeaveLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier)): after = set([sched_idx + 1]) else: raise LoopyError("unexpected type of schedule item: {ty}" .format(ty=type(next_item).__name__)) # Look at item if isinstance(item, LeaveLoop): # Account for loop loop_begin = block_bounds[sched_idx] after |= set([loop_begin]) elif not isinstance(item, (EnterLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier)): raise LoopyError("unexpected type of schedule item: {ty}" .format(ty=type(item).__name__)) successors[sched_idx] = after return successors
def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): schedule = kernel.schedule loop_bounds = get_block_boundaries(schedule) # {{{ inner mapper function dummy_call = CallKernel(kernel_name="", extra_args=[], extra_inames=[]) dummy_return = ReturnFromKernel(kernel_name="") def inner_mapper(start_idx, end_idx, new_schedule): schedule_required_splitting = False i = start_idx current_chunk = [] while i <= end_idx: sched_item = schedule[i] if isinstance(sched_item, RunInstruction): current_chunk.append(sched_item) i += 1 elif isinstance(sched_item, EnterLoop): loop_end = loop_bounds[i] inner_schedule = [] loop_required_splitting = inner_mapper(i + 1, loop_end - 1, inner_schedule) start_item = schedule[i] end_item = schedule[loop_end] i = loop_end + 1 if loop_required_splitting: schedule_required_splitting = True if current_chunk: new_schedule.extend([dummy_call.copy()] + current_chunk + [dummy_return.copy()]) new_schedule.extend([start_item] + inner_schedule + [end_item]) current_chunk = [] else: current_chunk.extend([start_item] + inner_schedule + [end_item]) elif isinstance(sched_item, Barrier): if sched_item.synchronization_kind == "global": # Wrap the current chunk into a kernel call. schedule_required_splitting = True if current_chunk: new_schedule.extend([dummy_call.copy()] + current_chunk + [dummy_return.copy()]) new_schedule.append(sched_item) current_chunk = [] else: current_chunk.append(sched_item) i += 1 else: raise LoopyError("unexpected type of schedule item: %s" % type(sched_item).__name__) if current_chunk and schedule_required_splitting: # Wrap remainder of schedule into a kernel call. new_schedule.extend([dummy_call.copy()] + current_chunk + [dummy_return.copy()]) else: new_schedule.extend(current_chunk) return schedule_required_splitting # }}} new_schedule = [] split_kernel = inner_mapper(0, len(schedule) - 1, new_schedule) if not split_kernel: # Wrap everything into a kernel call. new_schedule = ([dummy_call.copy()] + new_schedule + [dummy_return.copy()]) # Assign names, extra_inames to CallKernel / ReturnFromKernel instructions inames = [] for idx, sched_item in enumerate(new_schedule): if isinstance(sched_item, CallKernel): last_kernel_name = device_prog_name_gen() new_schedule[idx] = sched_item.copy(kernel_name=last_kernel_name, extra_inames=list(inames)) elif isinstance(sched_item, ReturnFromKernel): new_schedule[idx] = sched_item.copy(kernel_name=last_kernel_name) elif isinstance(sched_item, EnterLoop): inames.append(sched_item.iname) elif isinstance(sched_item, LeaveLoop): inames.pop() new_kernel = kernel.copy(schedule=new_schedule) return new_kernel