def get_kernel(self): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[icenter]: 0<=icenter<ncenters}", "{[idim]: 0<=idim<dim}", ], [""" for icenter <> isrc_box = qbx_center_to_target_box[icenter] # The box's expansions which we're translating here # (our source) is, globally speaking, a target box. <> src_ibox = target_boxes[isrc_box] \ {id=read_src_ibox} # Is the box number on the level currently under # consideration? <> in_range = (target_base_ibox <= src_ibox and src_ibox < target_base_ibox + nboxes) if in_range <> tgt_center[idim] = qbx_centers[idim, icenter] <> src_center[idim] = centers[idim, src_ibox] {dup=idim} <> tgt_rscale = qbx_expansion_radii[icenter] <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim} """] + [""" <> src_coeff{i} = \ expansions[src_ibox - target_base_ibox, {i}] \ {{dep=read_src_ibox}} """.format(i=i) for i in range(ncoeff_src)] + [ ] + self.get_translation_loopy_insns() + [""" qbx_expansions[icenter, {i}] = \ qbx_expansions[icenter, {i}] + coeff{i} \ {{id_prefix=write_expn}} """.format(i=i) for i in range(ncoeff_tgt)] + [""" end end """], [ lp.GlobalArg("target_boxes", None, shape=None, offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, naligned_boxes"), lp.ValueArg("src_rscale", None), lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", dim_tags="sep,c"), lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"), lp.ValueArg("naligned_boxes,target_base_ibox,nboxes", np.int32), lp.GlobalArg("expansions", None, shape=("nboxes", ncoeff_src), offset=lp.auto), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ncenters>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): if self.src_expansion is not self.tgt_expansion: raise RuntimeError("%s requires that the source " "and target expansion are the same object" % self.default_name) ncoeffs = len(self.src_expansion) # To clarify terminology: # # isrc_box -> The index in a list of (in this case, source) boxes # src_ibox -> The (global) box number for the (in this case, source) box # # (same for itgt_box, tgt_ibox) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[idim]: 0<=idim<dim}", ], [""" for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ <> src_ibox = box_parent_ids[tgt_ibox] \ {id=read_src_ibox} <> src_center[idim] = centers[idim, src_ibox] {dup=idim} <> d[idim] = tgt_center[idim] - src_center[idim] {dup=idim} """] + [""" <> src_coeff{i} = \ src_expansions[src_ibox - src_base_ibox, {i}] \ {{id_prefix=read_expn,dep=read_src_ibox}} """.format(i=i) for i in range(ncoeffs)] + [ ] + self.get_translation_loopy_insns() + [""" tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \ tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] + coeff{i} \ {{id_prefix=write_expn,nosync=read_expn*}} """.format(i=i) for i in range(ncoeffs)] + [""" end """], [ lp.GlobalArg("target_boxes", None, shape=lp.auto, offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, naligned_boxes"), lp.ValueArg("src_rscale,tgt_rscale", None), lp.ValueArg("naligned_boxes,nboxes", np.int32), lp.ValueArg("tgt_base_ibox,src_base_ibox", np.int32), lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32), lp.GlobalArg("box_parent_ids", None, shape="nboxes"), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto), lp.GlobalArg("src_expansions", None, shape=("nsrc_level_boxes", ncoeffs), offset=lp.auto), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[icenter]: 0<=icenter<ncenters}", "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}", "{[idim]: 0<=idim<dim}", ], [""" for icenter <> icontaining_tgt_box = \ qbx_center_to_target_box_source_level[icenter] if icontaining_tgt_box != -1 <> tgt_center[idim] = qbx_centers[idim, icenter] \ {id=fetch_tgt_center} <> tgt_rscale = qbx_expansion_radii[icenter] <> isrc_start = src_box_starts[icontaining_tgt_box] <> isrc_stop = src_box_starts[icontaining_tgt_box+1] for isrc_box <> src_ibox = src_box_lists[isrc_box] \ {id=read_src_ibox} <> src_center[idim] = centers[idim, src_ibox] {dup=idim} <> d[idim] = tgt_center[idim] - src_center[idim] \ {dup=idim} """] + [""" <> src_coeff{i} = \ src_expansions[src_ibox - src_base_ibox, {i}] \ {{dep=read_src_ibox}} """.format(i=i) for i in range(ncoeff_src)] + [ ] + self.get_translation_loopy_insns() + [""" end """] + [""" qbx_expansions[icenter, {i}] = \ qbx_expansions[icenter, {i}] + \ simul_reduce(sum, isrc_box, coeff{i}) \ {{id_prefix=write_expn}} """.format(i=i) for i in range(ncoeff_tgt)] + [""" end end """], [ lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.ValueArg("src_rscale", None), lp.GlobalArg("src_box_starts, src_box_lists", None, shape=None, strides=(1,)), lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", dim_tags="sep,c"), lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"), lp.ValueArg("aligned_nboxes,nsrc_level_boxes", np.int32), lp.ValueArg("src_base_ibox", np.int32), lp.GlobalArg("src_expansions", None, shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), lp.GlobalArg("qbx_expansions", None, shape=("ncenters", ncoeff_tgt)), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ncenters>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) # To clarify terminology: # # isrc_box -> The index in a list of (in this case, source) boxes # src_ibox -> The (global) box number for the (in this case, source) box # # (same for itgt_box, tgt_ibox) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}", "{[idim]: 0<=idim<dim}", ], [""" for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ <> isrc_start = src_box_starts[itgt_box] <> isrc_stop = src_box_starts[itgt_box+1] for isrc_box <> src_ibox = src_box_lists[isrc_box] \ {id=read_src_ibox} <> src_center[idim] = centers[idim, src_ibox] {dup=idim} <> d[idim] = tgt_center[idim] - src_center[idim] \ {dup=idim} """] + [""" <> src_coeff{coeffidx} = \ src_expansions[src_ibox - src_base_ibox, {coeffidx}] \ {{dep=read_src_ibox}} """.format(coeffidx=i) for i in range(ncoeff_src)] + [ ] + self.get_translation_loopy_insns() + [""" end """] + [""" tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ simul_reduce(sum, isrc_box, coeff{coeffidx}) \ {{id_prefix=write_expn}} """.format(coeffidx=i) for i in range(ncoeff_tgt)] + [""" end """], [ lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.ValueArg("src_rscale,tgt_rscale", None), lp.GlobalArg("src_box_starts, src_box_lists", None, shape=None, strides=(1,), offset=lp.auto), lp.ValueArg("aligned_nboxes,tgt_base_ibox,src_base_ibox", np.int32), lp.ValueArg("nsrc_level_boxes,ntgt_level_boxes", np.int32), lp.GlobalArg("src_expansions", None, shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", default_offset=lp.auto, fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION ) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) return loopy_knl
def get_kernel(self): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[icenter]: 0<=icenter<ncenters}", "{[idim]: 0<=idim<dim}", ], self.get_translation_loopy_insns() + [""" <> isrc_box = qbx_center_to_target_box[icenter] # The box's expansions which we're translating here # (our source) is, globally speaking, a target box. <> src_ibox = target_boxes[isrc_box] \ {id=read_src_ibox} <> tgt_center[idim] = qbx_centers[idim, icenter] \ {id=fetch_tgt_center} <> src_center[idim] = centers[idim, src_ibox] \ {id=fetch_src_center} <> d[idim] = tgt_center[idim] - src_center[idim] <> src_coeff${SRC_COEFFIDX} = \ expansions[src_ibox, ${SRC_COEFFIDX}] \ {dep=read_src_ibox} qbx_expansions[icenter, ${TGT_COEFFIDX}] = \ qbx_expansions[icenter, ${TGT_COEFFIDX}] \ + coeff${TGT_COEFFIDX} \ {id_prefix=write_expn} """], [ lp.GlobalArg("target_boxes", None, shape=None, offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, naligned_boxes"), lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", dim_tags="sep,c"), lp.ValueArg("naligned_boxes,nboxes", np.int32), lp.GlobalArg("expansions", None, shape=("nboxes", ncoeff_src)), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ncenters>=1", defines=dict( dim=self.dim, nchildren=2**self.dim, SRC_COEFFIDX=[str(i) for i in range(ncoeff_src)], TGT_COEFFIDX=[str(i) for i in range(ncoeff_tgt)], ), silenced_warnings="write_race(write_expn*)") for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center", tags={"idim": "unr"}) loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) return loopy_knl
def get_kernel(self): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[icenter]: 0<=icenter<ncenters}", "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}", "{[idim]: 0<=idim<dim}", ], self.get_translation_loopy_insns() + [""" <> icontaining_tgt_box = qbx_center_to_target_box[icenter] <> tgt_center[idim] = qbx_centers[idim, icenter] \ {id=fetch_tgt_center} <> isrc_start = src_box_starts[icontaining_tgt_box] <> isrc_stop = src_box_starts[icontaining_tgt_box+1] <> src_ibox = src_box_lists[isrc_box] \ {id=read_src_ibox} <> src_center[idim] = centers[idim, src_ibox] \ {id=fetch_src_center} <> d[idim] = tgt_center[idim] - src_center[idim] <> src_coeff${SRC_COEFFIDX} = \ src_expansions[src_ibox, ${SRC_COEFFIDX}] \ {dep=read_src_ibox} qbx_expansions[icenter, ${TGT_COEFFIDX}] = \ sum(isrc_box, coeff${TGT_COEFFIDX}) \ {id_prefix=write_expn} """], [ lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.GlobalArg("src_box_starts, src_box_lists", None, shape=None, strides=(1,)), lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", dim_tags="sep,c"), lp.ValueArg("aligned_nboxes,nboxes", np.int32), lp.GlobalArg("src_expansions", None, shape=("nboxes", ncoeff_src)), lp.GlobalArg("qbx_expansions", None, shape=("ncenters", ncoeff_tgt)), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ncenters>=1", defines=dict( dim=self.dim, SRC_COEFFIDX=[str(i) for i in range(ncoeff_src)], TGT_COEFFIDX=[str(i) for i in range(ncoeff_tgt)], ), silenced_warnings="write_race(write_expn*)") for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center", tags={"idim": "unr"}) loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) return loopy_knl
def get_kernel(self): if self.src_expansion is not self.tgt_expansion: raise RuntimeError("%s requires that the source " "and target expansion are the same object" % type(self).__name__) ncoeffs = len(self.src_expansion) # To clarify terminology: # # isrc_box -> The index in a list of (in this case, source) boxes # src_ibox -> The (global) box number for the (in this case, source) box # # (same for itgt_box, tgt_ibox) loopy_insns = [ insn.copy(predicates=insn.predicates | frozenset(["is_src_box_valid"]), id=lp.UniqueName("compute_coeff")) for insn in self.get_translation_loopy_insns() ] from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: 0<=isrc_box<nchildren}", "{[idim]: 0<=idim<dim}", ], [ """ for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ for isrc_box <> src_ibox = box_child_ids[isrc_box,tgt_ibox] \ {id=read_src_ibox} <> is_src_box_valid = src_ibox != 0 if is_src_box_valid <> src_center[idim] = centers[idim, src_ibox] {dup=idim} <> d[idim] = tgt_center[idim] - src_center[idim] \ {dup=idim} """ ] + [ """ <> src_coeff{i} = \ src_expansions[src_ibox - src_base_ibox, {i}] \ {{id_prefix=read_coeff,dep=read_src_ibox}} """.format(i=i) for i in range(ncoeffs) ] + [] + loopy_insns + [ """ tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \ tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] \ + coeff{i} \ {{id_prefix=write_expn,dep=compute_coeff*, nosync=read_coeff*}} """.format(i=i) for i in range(ncoeffs) ] + [ """ end end end """ ], [ lp.GlobalArg( "target_boxes", None, shape=lp.auto, offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.ValueArg("src_rscale,tgt_rscale", None), lp.GlobalArg( "box_child_ids", None, shape="nchildren, aligned_nboxes"), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto), lp.GlobalArg("src_expansions", None, shape=("nsrc_level_boxes", ncoeffs), offset=lp.auto), lp.ValueArg("src_base_ibox,tgt_base_ibox", np.int32), lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32), lp.ValueArg("aligned_nboxes", np.int32), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim)) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): if self.src_expansion is not self.tgt_expansion: raise RuntimeError("%s requires that the source " "and target expansion are the same object" % type(self).__name__) ncoeffs = len(self.src_expansion) # To clarify terminology: # # isrc_box -> The index in a list of (in this case, source) boxes # src_ibox -> The (global) box number for the (in this case, source) box # # (same for itgt_box, tgt_ibox) loopy_insns = [ insn.copy( predicates=insn.predicates | frozenset(["is_src_box_valid"]), id=lp.UniqueName("compute_coeff")) for insn in self.get_translation_loopy_insns()] from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: 0<=isrc_box<nchildren}", "{[idim]: 0<=idim<dim}", ], [""" for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> tgt_center[idim] = centers[idim, tgt_ibox] \ for isrc_box <> src_ibox = box_child_ids[isrc_box,tgt_ibox] \ {id=read_src_ibox} <> is_src_box_valid = src_ibox != 0 if is_src_box_valid <> src_center[idim] = centers[idim, src_ibox] {dup=idim} <> d[idim] = tgt_center[idim] - src_center[idim] \ {dup=idim} """] + [""" <> src_coeff{i} = \ src_expansions[src_ibox - src_base_ibox, {i}] \ {{id_prefix=read_coeff,dep=read_src_ibox}} """.format(i=i) for i in range(ncoeffs)] + [ ] + loopy_insns + [""" tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \ tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] \ + coeff{i} \ {{id_prefix=write_expn,dep=compute_coeff*, nosync=read_coeff*}} """.format(i=i) for i in range(ncoeffs)] + [""" end end end """], [ lp.GlobalArg("target_boxes", None, shape=lp.auto, offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.GlobalArg("box_child_ids", None, shape="nchildren, aligned_nboxes"), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto), lp.GlobalArg("src_expansions", None, shape=("nsrc_level_boxes", ncoeffs), offset=lp.auto), lp.ValueArg("src_base_ibox,tgt_base_ibox", np.int32), lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32), lp.ValueArg("aligned_nboxes", np.int32), "..." ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nchildren=2**self.dim) for expn in [self.src_expansion, self.tgt_expansion]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl