def get_default_src_tgt_arguments(self): from sumpy.tools import gather_loopy_source_arguments return ([ lp.GlobalArg("sources", None, shape=(self.dim, "nsources")), lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), lp.ValueArg("nsources", None), lp.ValueArg("ntargets", None) ] + ([lp.GlobalArg("target_to_source", None, shape=("ntargets", ))] if self.exclude_self else []) + gather_loopy_source_arguments(self.kernels))
def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments loopy_knl = lp.make_kernel( [ "{[isrc_box]: 0<=isrc_box<nsrc_boxes}", "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}", ], [""" for isrc_box <> src_ibox = source_boxes[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] <> center[idim] = centers[idim, src_ibox] {id=fetch_center} for isrc <> a[idim] = center[idim] - sources[idim, isrc] {dup=idim} <> strength = strengths[isrc] """] + self.get_loopy_instructions() + [""" end """] + [""" tgt_expansions[src_ibox-tgt_base_ibox, {coeffidx}] = \ simul_reduce(sum, isrc, strength*coeff{coeffidx}) \ {{id_prefix=write_expn}} """.format(coeffidx=i) for i in range(ncoeffs)] + [""" end """], [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), dim_tags="sep,c"), lp.GlobalArg("strengths", None, shape="nsources"), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.ValueArg("rscale", None), lp.GlobalArg("tgt_expansions", None, shape=("nboxes", ncoeffs), offset=lp.auto), lp.ValueArg("nboxes,aligned_nboxes,tgt_base_ibox", np.int32), lp.ValueArg("nsources", np.int32), "..." ] + gather_loopy_source_arguments([self.expansion]), name=self.name, assumptions="nsrc_boxes>=1", silenced_warnings="write_race(write_expn*)", default_offset=lp.auto, fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_default_src_tgt_arguments(self): from sumpy.tools import gather_loopy_source_arguments return ([ lp.GlobalArg("src", None, shape=(self.dim, "nsources"), order="C"), lp.GlobalArg("tgt", None, shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg( "center", None, shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg("expansion_radii", None, shape="ntargets"), lp.ValueArg("nsources", None), lp.ValueArg("ntargets", None) ] + gather_loopy_source_arguments(self.kernels))
def get_default_src_tgt_arguments(self): from sumpy.tools import gather_loopy_source_arguments return ([ lp.GlobalArg("sources", None, shape=(self.dim, "nsources")), lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), lp.ValueArg("nsources", None), lp.ValueArg("ntargets", None)] + ([lp.GlobalArg("target_to_source", None, shape=("ntargets",))] if self.exclude_self else []) + gather_loopy_source_arguments(self.kernels))
def get_default_src_tgt_arguments(self): from sumpy.tools import gather_loopy_source_arguments return ([ lp.GlobalArg("src", None, shape=(self.dim, "nsources"), order="C"), lp.GlobalArg("tgt", None, shape=(self.dim, "ntargets"), order="C"), lp.GlobalArg("center", None, shape=(self.dim, "ntargets"), dim_tags="sep,C"), lp.GlobalArg("expansion_radii", None, shape="ntargets"), lp.ValueArg("nsources", None), lp.ValueArg("ntargets", None)] + gather_loopy_source_arguments(self.kernels))
def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() kernel_exprs = self.get_kernel_exprs(result_names) from sumpy.tools import gather_loopy_source_arguments arguments = ( gather_loopy_source_arguments(self.kernels) + [ lp.GlobalArg("src", None, shape=(self.dim, "nsources"), order="C"), lp.GlobalArg("tgt", None, shape=(self.dim, "ntargets_total"), order="C"), lp.GlobalArg("center", None, shape=(self.dim, "ncenters_total"), dim_tags="sep,C"), lp.GlobalArg("expansion_radii", None, shape="ncenters_total"), lp.GlobalArg("qbx_tgt_numbers", None, shape="ntargets"), lp.GlobalArg("qbx_center_numbers", None, shape="ntargets"), lp.ValueArg("nsources", np.int32), lp.ValueArg("ntargets", np.int32), lp.ValueArg("ntargets_total", np.int32), lp.ValueArg("ncenters_total", np.int32)] + [lp.GlobalArg("strength_%d" % i, None, shape="nsources", order="C") for i in range(self.strength_count)] + [lp.GlobalArg("result_%d" % i, self.value_dtypes[i], shape="ntargets_total", order="C") for i in range(len(self.kernels))]) loopy_knl = lp.make_kernel([ "{[itgt]: 0 <= itgt < ntargets}", "{[isrc]: 0 <= isrc < nsources}", "{[idim]: 0 <= idim < dim}" ], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + [""" <> icenter = qbx_center_numbers[itgt] <> itgt_overall = qbx_tgt_numbers[itgt] <> a[idim] = center[idim, icenter] - src[idim, isrc] \ {dup=idim} <> b[idim] = tgt[idim, itgt_overall] - center[idim, icenter] \ {dup=idim} <> rscale = expansion_radii[icenter] """] + loopy_insns + kernel_exprs + [""" result_{i}[itgt_overall] = knl_{i}_scaling * \ simul_reduce(sum, isrc, pair_result_{i}) \ {{inames=itgt}} """.format(i=iknl) for iknl in range(len(self.expansions))] + ["end"], arguments, name=self.name, assumptions="ntargets>=1 and nsources>=1", fixed_parameters=dict(dim=self.dim)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.expansions: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) return loopy_knl
def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() kernel_exprs = self.get_kernel_exprs(result_names) from sumpy.tools import gather_loopy_source_arguments arguments = (gather_loopy_source_arguments(self.kernels) + [ lp.GlobalArg("src", None, shape=(self.dim, "nsources"), order="C"), lp.GlobalArg( "tgt", None, shape=(self.dim, "ntargets_total"), order="C"), lp.GlobalArg("center", None, shape=(self.dim, "ncenters_total"), dim_tags="sep,C"), lp.GlobalArg("expansion_radii", None, shape="ncenters_total"), lp.GlobalArg("qbx_tgt_numbers", None, shape="ntargets"), lp.GlobalArg("qbx_center_numbers", None, shape="ntargets"), lp.ValueArg("nsources", np.int32), lp.ValueArg("ntargets", np.int32), lp.ValueArg("ntargets_total", np.int32), lp.ValueArg("ncenters_total", np.int32) ] + [ lp.GlobalArg("strength_%d" % i, None, shape="nsources", order="C") for i in range(self.strength_count) ] + [ lp.GlobalArg("result_%d" % i, self.value_dtypes[i], shape="ntargets_total", order="C") for i in range(len(self.kernels)) ]) loopy_knl = lp.make_kernel([ "{[itgt]: 0 <= itgt < ntargets}", "{[isrc]: 0 <= isrc < nsources}", "{[idim]: 0 <= idim < dim}" ], self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + [ """ <> icenter = qbx_center_numbers[itgt] <> itgt_overall = qbx_tgt_numbers[itgt] <> a[idim] = center[idim, icenter] - src[idim, isrc] \ {dup=idim} <> b[idim] = tgt[idim, itgt_overall] - center[idim, icenter] \ {dup=idim} <> rscale = expansion_radii[icenter] """ ] + loopy_insns + kernel_exprs + [ """ result_{i}[itgt_overall] = knl_{i}_scaling * \ simul_reduce(sum, isrc, pair_result_{i}) \ {{inames=itgt}} """.format(i=iknl) for iknl in range(len(self.expansions)) ] + ["end"], arguments, name=self.name, assumptions="ntargets>=1 and nsources>=1", fixed_parameters=dict(dim=self.dim)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.expansions: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) return loopy_knl
def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() from pymbolic import var exprs = [ var(name) * var("strength").index( (self.strength_usage[i], var("isrc"))) for i, name in enumerate(result_names) ] if self.exclude_self: from pymbolic.primitives import If, Variable exprs = [If(Variable("is_self"), 0, expr) for expr in exprs] from sumpy.tools import gather_loopy_source_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_end}", "{[itgt,isrc,idim]: \ itgt_start<=itgt<itgt_end and \ isrc_start<=isrc<isrc_end and \ 0<=idim<dim }", ], self.get_kernel_scaling_assignments() + [ """ for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> itgt_start = box_target_starts[tgt_ibox] <> itgt_end = itgt_start+box_target_counts_nonchild[tgt_ibox] <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_end = source_box_starts[itgt_box+1] for isrc_box <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] for itgt for isrc <> d[idim] = \ targets[idim,itgt] - sources[idim,isrc] \ {dup=idim} """ ] + [ """ <> is_self = (isrc == target_to_source[itgt]) """ if self.exclude_self else "" ] + [] + loopy_insns + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) for i, expr in enumerate(exprs) ] + [ """ end """ ] + [ """ result[KNLIDX, itgt] = result[KNLIDX, itgt] + \ knl_KNLIDX_scaling \ * simul_reduce(sum, isrc, pair_result_KNLIDX) """.replace("KNLIDX", str(iknl)) for iknl in range(len(exprs)) ] + [ """ end end end """ ], [ lp.GlobalArg( "box_target_starts,box_target_counts_nonchild," "box_source_starts,box_source_counts_nonchild,", None, shape=None), lp.GlobalArg( "source_box_starts, source_box_lists,", None, shape=None), lp.GlobalArg("strength", None, shape="nstrengths,nsources"), lp.GlobalArg("result", None, shape="nkernels,ntargets", dim_tags="sep,c"), lp.GlobalArg( "targets", None, shape="dim,ntargets", dim_tags="sep,c"), lp.GlobalArg( "sources", None, shape="dim,nsources", dim_tags="sep,c"), lp.ValueArg("nsources", np.int32), lp.ValueArg("ntargets", np.int32), "...", ] + ([ lp.GlobalArg( "target_to_source", np.int32, shape=("ntargets", )) ] if self.exclude_self else []) + gather_loopy_source_arguments(self.kernels), name=self.name, assumptions="ntgt_boxes>=1", fixed_parameters=dict(dim=self.dim, nstrengths=self.strength_count, nkernels=len(self.kernels))) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_array_axes(loopy_knl, "strength", "sep,C") for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl
def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() from pymbolic import var exprs = [ var(name) * var("strength").index( (self.strength_usage[i], var("isrc"))) for i, name in enumerate(result_names) ] if self.exclude_self: from pymbolic.primitives import If, Variable exprs = [If(Variable("is_self"), 0, expr) for expr in exprs] from sumpy.tools import gather_loopy_source_arguments loopy_knl = lp.make_kernel( "{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources \ and 0<=idim<dim}", self.get_kernel_scaling_assignments() + [ """ for itgt for isrc """ ] + loopy_insns + [ """ <> d[idim] = targets[idim,itgt] - sources[idim,isrc] """ ] + [ """ <> is_self = (isrc == target_to_source[itgt]) """ if self.exclude_self else "" ] + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) for i, expr in enumerate(exprs) ] + [""" end """] + [ """ result[KNLIDX, itgt] = knl_KNLIDX_scaling \ * simul_reduce(sum, isrc, pair_result_KNLIDX) """.replace("KNLIDX", str(iknl)) for iknl in range(len(exprs)) ] + [] + [""" end """], [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources")), lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), lp.ValueArg("nsources", None), lp.ValueArg("ntargets", None), lp.GlobalArg("strength", None, shape="nstrengths,nsources"), lp.GlobalArg("result", None, shape="nresults,ntargets", dim_tags="sep,C") ] + ([ lp.GlobalArg( "target_to_source", np.int32, shape=("ntargets", )) ] if self.exclude_self else []) + gather_loopy_source_arguments(self.kernels), name=self.name, assumptions="nsources>=1 and ntargets>=1", fixed_parameters=dict(dim=self.dim, nstrengths=self.strength_count, nresults=len(self.kernels))) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_array_axes(loopy_knl, "strength", "sep,C") return loopy_knl
def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() from pymbolic import var exprs = [ var(name) * var("strength").index((self.strength_usage[i], var("isrc"))) for i, name in enumerate(result_names)] from sumpy.tools import gather_loopy_source_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_end}", "{[itgt,isrc,idim]: \ itgt_start<=itgt<itgt_end and \ isrc_start<=isrc<isrc_end and \ 0<=idim<dim }", ], self.get_kernel_scaling_assignments() + [ """ for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> itgt_start = box_target_starts[tgt_ibox] <> itgt_end = itgt_start+box_target_counts_nonchild[tgt_ibox] <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_end = source_box_starts[itgt_box+1] for isrc_box <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] for itgt for isrc <> d[idim] = \ targets[idim,itgt] - sources[idim,isrc] \ {dup=idim} """ ] + loopy_insns + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) for i, expr in enumerate(exprs) ] + [ """ end """] + [""" result[KNLIDX, itgt] = result[KNLIDX, itgt] + \ knl_KNLIDX_scaling \ * simul_reduce(sum, isrc, pair_result_KNLIDX) """.replace("KNLIDX", str(iknl)) for iknl in range(len(exprs))] + [""" end end end """], [ lp.GlobalArg("box_target_starts,box_target_counts_nonchild," "box_source_starts,box_source_counts_nonchild,", None, shape=None), lp.GlobalArg("source_box_starts, source_box_lists,", None, shape=None), lp.GlobalArg("strength", None, shape="nstrengths,nsources"), lp.GlobalArg("result", None, shape="nkernels,ntargets", dim_tags="sep,c"), lp.GlobalArg("targets", None, shape="dim,ntargets", dim_tags="sep,c"), lp.GlobalArg("sources", None, shape="dim,nsources", dim_tags="sep,c"), lp.ValueArg("nsources", np.int32), lp.ValueArg("ntargets", np.int32), "...", ] + gather_loopy_source_arguments(self.kernels), name=self.name, assumptions="ntgt_boxes>=1") loopy_knl = lp.fix_parameters( loopy_knl, dim=self.dim, nstrengths=self.strength_count, nkernels=len(self.kernels)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_array_axes(loopy_knl, "strength", "sep,C") for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) return loopy_knl
def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments arguments = ( [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), dim_tags="sep,c"), lp.GlobalArg("strengths", None, shape="nsources"), lp.GlobalArg("qbx_center_to_target_box", None, shape=None), lp.GlobalArg("source_box_starts,source_box_lists", None, shape=None), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", dim_tags="sep,c"), lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"), lp.GlobalArg("qbx_expansions", None, shape=("ncenters", ncoeffs)), lp.ValueArg("ncenters", np.int32), lp.ValueArg("nsources", np.int32), "..." ] + gather_loopy_source_arguments([self.expansion])) loopy_knl = lp.make_kernel( [ "{[itgt_center]: 0<=itgt_center<ntgt_centers}", "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}", "{[isrc]: isrc_start<=isrc<isrc_end}", "{[idim]: 0<=idim<dim}", ], [""" for itgt_center <> tgt_icenter = global_qbx_centers[itgt_center] <> center[idim] = qbx_centers[idim, tgt_icenter] {dup=idim} <> rscale = qbx_expansion_radii[tgt_icenter] <> itgt_box = qbx_center_to_target_box[tgt_icenter] <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_stop = source_box_starts[itgt_box+1] for isrc_box <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] for isrc <> a[idim] = center[idim] - sources[idim, isrc] \ {dup=idim} <> strength = strengths[isrc] """] + self.get_loopy_instructions() + [""" end end """] + [""" qbx_expansions[tgt_icenter, {i}] = \ simul_reduce(sum, (isrc_box, isrc), strength*coeff{i}) \ {{id_prefix=write_expn}} """.format(i=i) for i in range(ncoeffs)] + [""" end """], arguments, name=self.name, assumptions="ntgt_centers>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim), lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments arguments = ( [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), dim_tags="sep,c"), lp.GlobalArg("strengths", None, shape="nsources"), lp.GlobalArg("qbx_center_to_target_box", None, shape=None), lp.GlobalArg("source_box_starts,source_box_lists", None, shape=None), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", dim_tags="sep,c"), lp.GlobalArg("qbx_expansions", None, shape=("ncenters", ncoeffs)), lp.ValueArg("ncenters", np.int32), lp.ValueArg("nsources", np.int32), "..." ] + gather_loopy_source_arguments([self.expansion])) loopy_knl = lp.make_kernel( [ "{[itgt_center]: 0<=itgt_center<ntgt_centers}", "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}", "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}", ], self.get_loopy_instructions() + [""" <> tgt_icenter = global_qbx_centers[itgt_center] <> itgt_box = qbx_center_to_target_box[tgt_icenter] <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_stop = source_box_starts[itgt_box+1] <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] <> center[idim] = qbx_centers[idim, tgt_icenter] \ {id=fetch_center} <> a[idim] = center[idim] - sources[idim, isrc] {id=compute_a} <> strength = strengths[isrc] qbx_expansions[tgt_icenter, ${COEFFIDX}] = \ sum((isrc_box, isrc), strength*coeff${COEFFIDX}) \ {id_prefix=write_expn} """], arguments, name=self.name, assumptions="ntgt_centers>=1", defines=dict( dim=self.dim, COEFFIDX=[str(i) for i in range(ncoeffs)] ), silenced_warnings="write_race(write_expn*)") loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_center", tags={"idim": "unr"}) loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr")) return loopy_knl
def get_kernel(self): from sumpy.symbolic import make_sym_vector avec = make_sym_vector("a", self.dim) bvec = make_sym_vector("b", self.dim) from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() logger.info("compute expansion expressions: start") result_names = [ expand(i, sac, expn, avec, bvec) for i, expn in enumerate(self.expansions) ] logger.info("compute expansion expressions: done") sac.run_global_cse() from sumpy.codegen import to_loopy_insns loopy_insns = to_loopy_insns( six.iteritems(sac.assignments), vector_names=set(["a", "b"]), pymbolic_expr_maps=[ expn.kernel.get_code_transformer() for expn in self.expansions ], retain_names=result_names, complex_dtype=np.complex128 # FIXME ) isrc_sym = var("isrc") exprs = [ var(name) * self.get_strength_or_not(isrc_sym, i) for i, name in enumerate(result_names) ] from sumpy.tools import gather_loopy_source_arguments arguments = (self.get_src_tgt_arguments() + self.get_input_and_output_arguments() + gather_loopy_source_arguments(self.kernels)) loopy_knl = lp.make_kernel( "{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources " "and 0<=idim<dim}", self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + [self.get_compute_a_and_b_vecs()] + loopy_insns + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) for i, (expr, dtype) in enumerate(zip(exprs, self.value_dtypes)) ] + ["end"] + self.get_result_store_instructions(), arguments, name=self.name, assumptions="nsources>=1 and ntargets>=1", default_offset=lp.auto, silenced_warnings="write_race(write_lpot*)", fixed_parameters=dict(dim=self.dim)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.expansions: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C") return loopy_knl
def get_kernel(self): from sumpy.symbolic import make_sympy_vector avec = make_sympy_vector("a", self.dim) bvec = make_sympy_vector("b", self.dim) from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() logger.info("compute expansion expressions: start") result_names = [expand(i, sac, expn, avec, bvec) for i, expn in enumerate(self.expansions)] logger.info("compute expansion expressions: done") sac.run_global_cse() from sumpy.symbolic import kill_trivial_assignments assignments = kill_trivial_assignments([ (name, expr.subs("tau", 0)) for name, expr in six.iteritems(sac.assignments)], retain_names=result_names) from sumpy.codegen import to_loopy_insns loopy_insns = to_loopy_insns(assignments, vector_names=set(["a", "b"]), pymbolic_expr_maps=[ expn.kernel.get_code_transformer() for expn in self.expansions], complex_dtype=np.complex128 # FIXME ) isrc_sym = var("isrc") exprs = [ var(name) * self.get_strength_or_not(isrc_sym, i) for i, name in enumerate(result_names)] from sumpy.tools import gather_loopy_source_arguments arguments = ( self.get_src_tgt_arguments() + self.get_input_and_output_arguments() + gather_loopy_source_arguments(self.kernels)) loopy_knl = lp.make_kernel( "{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources " "and 0<=idim<dim}", self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + [self.get_compute_a_and_b_vecs()] + loopy_insns + [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) for i, (expr, dtype) in enumerate(zip(exprs, self.value_dtypes)) ] + ["end"] + self.get_result_store_instructions(), arguments, name=self.name, assumptions="nsources>=1 and ntargets>=1", default_offset=lp.auto, silenced_warnings="write_race(write_lpot*)" ) loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.expansions: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C") return loopy_knl
def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments arguments = ( [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), dim_tags="sep,c"), lp.GlobalArg("strengths", None, shape="nsources"), lp.GlobalArg("source_box_starts,source_box_lists", None, shape=None, offset=lp.auto), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("centers", None, shape="dim, naligned_boxes"), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto), lp.ValueArg("naligned_boxes,ntgt_level_boxes,tgt_base_ibox", np.int32), lp.ValueArg("nsources", np.int32), "..." ] + gather_loopy_source_arguments([self.expansion])) loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}", "{[isrc]: isrc_start<=isrc<isrc_end}", "{[idim]: 0<=idim<dim}", ], [""" for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> center[idim] = centers[idim, tgt_ibox] {id=fetch_center} <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_stop = source_box_starts[itgt_box+1] for isrc_box <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] for isrc <> a[idim] = center[idim] - sources[idim, isrc] \ {dup=idim} <> strength = strengths[isrc] """] + self.get_loopy_instructions() + [""" end end """] + [""" tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ simul_reduce(sum, (isrc_box, isrc), strength*coeff{coeffidx}) {{id_prefix=write_expn}} """.format(coeffidx=i) for i in range(ncoeffs)] + [""" end """], arguments, name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", default_offset=lp.auto) loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments arguments = ( [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), dim_tags="sep,c"), lp.GlobalArg("strengths", None, dim_tags="sep,c", shape="strength_count, nsources"), lp.GlobalArg("qbx_center_to_target_box", None, shape=None), lp.GlobalArg("source_box_starts,source_box_lists", None, shape=None), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("qbx_centers", None, shape="dim, ncenters", dim_tags="sep,c"), lp.GlobalArg("qbx_expansion_radii", None, shape="ncenters"), lp.GlobalArg("qbx_expansions", None, shape=("ncenters", ncoeffs)), lp.ValueArg("ncenters", np.int32), lp.ValueArg("nsources", np.int32), "..." ] + gather_loopy_source_arguments([self.expansion])) loopy_knl = lp.make_kernel( [ "{[itgt_center]: 0<=itgt_center<ntgt_centers}", "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}", "{[isrc]: isrc_start<=isrc<isrc_end}", "{[idim]: 0<=idim<dim}", ], [""" for itgt_center <> tgt_icenter = global_qbx_centers[itgt_center] <> center[idim] = qbx_centers[idim, tgt_icenter] {dup=idim} <> rscale = qbx_expansion_radii[tgt_icenter] <> itgt_box = qbx_center_to_target_box[tgt_icenter] <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_stop = source_box_starts[itgt_box+1] for isrc_box <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] for isrc <> a[idim] = center[idim] - sources[idim, isrc] \ {dup=idim} """] + self.get_loopy_instructions() + [""" end end """] + [f""" qbx_expansions[tgt_icenter, {i}] = \ simul_reduce(sum, (isrc_box, isrc), \ {self.get_result_expr(i)}) \ {{id_prefix=write_expn}} """ for i in range(ncoeffs)] + [""" end """], arguments, name=self.name, assumptions="ntgt_centers>=1", silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim, strength_count=self.strength_count), lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments arguments = ([ lp.GlobalArg("sources", None, shape=(self.dim, "nsources"), dim_tags="sep,c"), lp.GlobalArg("strengths", None, shape="strength_count, nsources", dim_tags="sep,C"), lp.GlobalArg("source_box_starts,source_box_lists", None, shape=None, offset=lp.auto), lp.GlobalArg("box_source_starts,box_source_counts_nonchild", None, shape=None), lp.GlobalArg("centers", None, shape="dim, naligned_boxes"), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeffs), offset=lp.auto), lp.ValueArg("naligned_boxes,ntgt_level_boxes,tgt_base_ibox", np.int32), lp.ValueArg("nsources", np.int32), "..." ] + gather_loopy_source_arguments(self.kernels + (self.expansion, ))) loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box<ntgt_boxes}", "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}", "{[isrc]: isrc_start<=isrc<isrc_end}", "{[idim]: 0<=idim<dim}", ], [ """ for itgt_box <> tgt_ibox = target_boxes[itgt_box] <> center[idim] = centers[idim, tgt_ibox] {id=fetch_center} <> isrc_box_start = source_box_starts[itgt_box] <> isrc_box_stop = source_box_starts[itgt_box+1] for isrc_box <> src_ibox = source_box_lists[isrc_box] <> isrc_start = box_source_starts[src_ibox] <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] for isrc <> a[idim] = center[idim] - sources[idim, isrc] \ {dup=idim} """ ] + self.get_loopy_instructions() + [ """ end end """ ] + [ f""" tgt_expansions[tgt_ibox - tgt_base_ibox, {coeffidx}] = \ simul_reduce(sum, (isrc_box, isrc), {self.get_result_expr(coeffidx)}) \ {{id_prefix=write_expn}} """ for coeffidx in range(ncoeffs) ] + [""" end """], arguments, name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", default_offset=lp.auto, fixed_parameters=dict(dim=self.dim, strength_count=self.strength_count), lang_version=MOST_RECENT_LANGUAGE_VERSION) loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl
def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() from pymbolic import var exprs = [ var(name) * var("strength").index((self.strength_usage[i], var("isrc"))) for i, name in enumerate(result_names)] if self.exclude_self: from pymbolic.primitives import If, ComparisonOperator, Variable exprs = [ If( ComparisonOperator(Variable("isrc"), "!=", Variable("itgt")), expr, 0) for expr in exprs] from sumpy.tools import gather_loopy_source_arguments loopy_knl = lp.make_kernel( "{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources \ and 0<=idim<dim}", self.get_kernel_scaling_assignments() + [""" for itgt for isrc """] + loopy_insns + [""" <> d[idim] = targets[idim,itgt] - sources[idim,isrc] \ """]+[ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.auto) for i, expr in enumerate(exprs) ] + [""" end """] + [""" result[KNLIDX, itgt] = knl_KNLIDX_scaling \ * simul_reduce(sum, isrc, pair_result_KNLIDX) """.replace("KNLIDX", str(iknl)) for iknl in range(len(exprs))] + [ ] + [""" end """], [ lp.GlobalArg("sources", None, shape=(self.dim, "nsources")), lp.GlobalArg("targets", None, shape=(self.dim, "ntargets")), lp.ValueArg("nsources", None), lp.ValueArg("ntargets", None), lp.GlobalArg("strength", None, shape="nstrengths,nsources"), lp.GlobalArg("result", None, shape="nresults,ntargets", dim_tags="sep,C") ] + gather_loopy_source_arguments(self.kernels), name=self.name, assumptions="nsources>=1 and ntargets>=1") loopy_knl = lp.fix_parameters( loopy_knl, dim=self.dim, nstrengths=self.strength_count, nresults=len(self.kernels)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) loopy_knl = lp.tag_array_axes(loopy_knl, "strength", "sep,C") return loopy_knl