def finish(self): new_instructions = [] insns_to_insert = dict( (insn.id, insn) for insn in self.insns_to_insert) for orig_insn in self.kernel.instructions: if orig_insn.id in self.insns_to_update: new_instructions.append(self.insns_to_update[orig_insn.id]) else: new_instructions.append(orig_insn) new_instructions.extend( sorted(insns_to_insert.values(), key=lambda insn: insn.id)) self.updated_iname_to_tag.update(self.kernel.iname_to_tag) self.updated_temporary_variables.update( self.kernel.temporary_variables) new_domains = list(self.kernel.domains) import islpy as isl if self.new_subdomain.dim(isl.dim_type.set) > 0: new_domains.append(self.new_subdomain) kernel = self.kernel.copy( domains=new_domains, instructions=new_instructions, iname_to_tag=self.updated_iname_to_tag, temporary_variables=self.updated_temporary_variables, overridden_get_grid_sizes_for_insn_ids=None) # Add nosync directives to any saves or reloads that were added with a # potential dependency chain. from loopy.kernel.tools import get_subkernels for subkernel in get_subkernels(kernel): relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel] from itertools import product for temporary in self.temporary_to_reload_ids: for source, sink in product( relevant_insns & self.temporary_to_reload_ids[temporary], relevant_insns & self.temporary_to_save_ids[temporary]): kernel = lp.add_nosync(kernel, "global", source, sink) from loopy.kernel.tools import assign_automatic_axes return assign_automatic_axes(kernel)
def finish(self): new_instructions = [] insns_to_insert = dict((insn.id, insn) for insn in self.insns_to_insert) for orig_insn in self.kernel.instructions: if orig_insn.id in self.insns_to_update: new_instructions.append(self.insns_to_update[orig_insn.id]) else: new_instructions.append(orig_insn) new_instructions.extend( sorted(insns_to_insert.values(), key=lambda insn: insn.id)) self.updated_iname_to_tags.update(self.kernel.iname_to_tags) self.updated_temporary_variables.update(self.kernel.temporary_variables) new_domains = list(self.kernel.domains) import islpy as isl if self.new_subdomain.dim(isl.dim_type.set) > 0: new_domains.append(self.new_subdomain) kernel = self.kernel.copy( domains=new_domains, instructions=new_instructions, iname_to_tags=self.updated_iname_to_tags, temporary_variables=self.updated_temporary_variables, overridden_get_grid_sizes_for_insn_ids=None) # Add nosync directives to any saves or reloads that were added with a # potential dependency chain. from loopy.kernel.tools import get_subkernels for subkernel in get_subkernels(kernel): relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel] from itertools import product for temporary in self.temporary_to_reload_ids: for source, sink in product( relevant_insns & self.temporary_to_reload_ids[temporary], relevant_insns & self.temporary_to_save_ids[temporary]): kernel = lp.add_nosync(kernel, "global", source, sink) from loopy.kernel.tools import assign_automatic_axes return assign_automatic_axes(kernel)
def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): from loopy.kernel.data import AddressSpace from loopy.kernel.tools import get_subkernels for subkernel in get_subkernels(kernel): defined_base_storage = set() from loopy.schedule.tools import (temporaries_written_in_subkernel, temporaries_read_in_subkernel) for temporary in temporaries_written_in_subkernel(kernel, subkernel): tval = kernel.temporary_variables[temporary] if tval.base_storage is not None: defined_base_storage.add(tval.base_storage) for temporary in (temporaries_read_in_subkernel(kernel, subkernel) - temporaries_written_in_subkernel(kernel, subkernel)): tval = kernel.temporary_variables[temporary] if tval.initializer is not None: continue # For aliased temporaries, check if there is an aliased definition. if tval.base_storage is not None: if tval.base_storage not in defined_base_storage: from loopy.diagnostic import MissingDefinitionError raise MissingDefinitionError( "temporary variable '%s' gets " "used in subkernel '%s' and neither it nor its " "aliases have a definition" % (temporary, subkernel)) continue if tval.address_space in (AddressSpace.PRIVATE, AddressSpace.LOCAL): from loopy.diagnostic import MissingDefinitionError raise MissingDefinitionError( "temporary variable '%s' gets used " "in subkernel '%s' without a definition (maybe you forgot " "to call loopy.save_and_reload_temporaries?)" % (temporary, subkernel))
def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): from loopy.kernel.data import AddressSpace from loopy.kernel.tools import get_subkernels for subkernel in get_subkernels(kernel): defined_base_storage = set() from loopy.schedule.tools import ( temporaries_written_in_subkernel, temporaries_read_in_subkernel) for temporary in temporaries_written_in_subkernel(kernel, subkernel): tval = kernel.temporary_variables[temporary] if tval.base_storage is not None: defined_base_storage.add(tval.base_storage) for temporary in ( temporaries_read_in_subkernel(kernel, subkernel) - temporaries_written_in_subkernel(kernel, subkernel)): tval = kernel.temporary_variables[temporary] if tval.initializer is not None: continue # For aliased temporaries, check if there is an aliased definition. if tval.base_storage is not None: if tval.base_storage not in defined_base_storage: from loopy.diagnostic import MissingDefinitionError raise MissingDefinitionError("temporary variable '%s' gets " "used in subkernel '%s' and neither it nor its " "aliases have a definition" % (temporary, subkernel)) continue if tval.address_space in (AddressSpace.PRIVATE, AddressSpace.LOCAL): from loopy.diagnostic import MissingDefinitionError raise MissingDefinitionError("temporary variable '%s' gets used " "in subkernel '%s' without a definition (maybe you forgot " "to call loopy.save_and_reload_temporaries?)" % (temporary, subkernel))