def get_kernels(self, key_dtype, value_dtype, starts_dtype): from pyopencl.algorithm import RadixSort from pyopencl.tools import VectorArg, ScalarArg by_target_sorter = RadixSort( self.context, [ VectorArg(value_dtype, "values"), VectorArg(key_dtype, "keys"), ], key_expr="keys[i]", sort_arg_names=["values", "keys"]) from pyopencl.elementwise import ElementwiseTemplate start_finder = ElementwiseTemplate( arguments="""//CL// starts_t *key_group_starts, key_t *keys_sorted_by_key, """, operation=r"""//CL// key_t my_key = keys_sorted_by_key[i]; if (i == 0 || my_key != keys_sorted_by_key[i-1]) key_group_starts[my_key] = i; """, name="find_starts").build(self.context, type_aliases=( ("key_t", starts_dtype), ("starts_t", starts_dtype), ), var_values=()) from pyopencl.scan import GenericScanKernel bound_propagation_scan = GenericScanKernel( self.context, starts_dtype, arguments=[ VectorArg(starts_dtype, "starts"), # starts has length n+1 ScalarArg(key_dtype, "nkeys"), ], input_expr="starts[nkeys-i]", scan_expr="min(a, b)", neutral=_make_cl_int_literal( np.iinfo(starts_dtype).max, starts_dtype), output_statement="starts[nkeys-i] = item;") return _KernelInfo( by_target_sorter=by_target_sorter, start_finder=start_finder, bound_propagation_scan=bound_propagation_scan)
def __init__(self, extra_args, ball_center_and_radius_expr, leaf_found_op, preamble="", name="area_query_elwise"): def wrap_in_macro(decl, expr): return """ <%def name=\"{decl}\"> {expr} </%def> """.format(decl=decl, expr=expr) from boxtree.traversal import TRAVERSAL_PREAMBLE_MAKO_DEFS self.elwise_template = ElementwiseTemplate( arguments=r"""//CL:mako// coord_t *box_centers, coord_t root_extent, box_level_t *box_levels, box_id_t aligned_nboxes, box_id_t *box_child_ids, box_flags_t *box_flags, peer_list_idx_t *peer_list_starts, box_id_t *peer_lists, %for ax in AXIS_NAMES[:dimensions]: coord_t bbox_min_${ax}, %endfor """ + extra_args, operation="//CL:mako//\n" + wrap_in_macro( "get_ball_center_and_radius(ball_center, ball_radius, i)", ball_center_and_radius_expr) + wrap_in_macro( "leaf_found_op(leaf_box_id, ball_center, ball_radius)", leaf_found_op) + TRAVERSAL_PREAMBLE_MAKO_DEFS + GUIDING_BOX_FINDER_MACRO + AREA_QUERY_WALKER_BODY, name=name, preamble=preamble)
TRANSLATION_CLASS_FINDER_TEMPLATE = ElementwiseTemplate( arguments=r"""//CL:mako// /* input: */ box_id_t *from_sep_siblings_lists, box_id_t *from_sep_siblings_starts, box_id_t *target_or_target_parent_boxes, int ntarget_or_target_parent_boxes, coord_t *box_centers, int aligned_nboxes, coord_t root_extent, box_level_t *box_levels, int well_sep_is_n_away, /* output: */ int *translation_classes, int *translation_class_is_used, int *error_flag, """, operation=TRAVERSAL_PREAMBLE_MAKO_DEFS + r"""//CL:mako// // Find the target box for this source box. box_id_t source_box_id = from_sep_siblings_lists[i]; size_t itarget_box = bsearch( from_sep_siblings_starts, 1 + ntarget_or_target_parent_boxes, i); box_id_t target_box_id = target_or_target_parent_boxes[itarget_box]; // Ensure levels are the same. if (box_levels[source_box_id] != box_levels[target_box_id]) { atomic_or(error_flag, 1); PYOPENCL_ELWISE_CONTINUE; } // Compute the translation vector and translation class. ${load_center("source_center", "source_box_id")} ${load_center("target_center", "target_box_id")} int_coord_vec_t vec = get_normalized_translation_vector( root_extent, box_levels[source_box_id], source_center, target_center); int translation_class = get_translation_class(vec, well_sep_is_n_away); // Ensure valid translation class. if (translation_class == -1) { atomic_or(error_flag, 1); PYOPENCL_ELWISE_CONTINUE; } % if translation_class_per_level: translation_class += box_levels[source_box_id] * \ ${ntranslation_classes_per_level}; % endif translation_classes[i] = translation_class; atomic_or(&translation_class_is_used[translation_class], 1); """)
return result, evt # }}} # {{{ map values through table from pyopencl.elementwise import ElementwiseTemplate MAP_VALUES_TPL = ElementwiseTemplate( arguments="""//CL// dst_value_t *dst, src_value_t *src, dst_value_t *map_values """, operation=r"""//CL// dst[i] = map_values[src[i]]; """, name="map_values") class MapValuesKernel(object): def __init__(self, context): self.context = context @memoize_method def _get_kernel(self, dst_dtype, src_dtype): type_aliases = ( ("src_value_t", src_dtype),
class AreaQueryElementwiseTemplate(object): """ Experimental: Intended as a way to perform operations in the body of an area query. """ @staticmethod def unwrap_args(tree, peer_lists, *args): return (tree.box_centers, tree.root_extent, tree.box_levels, tree.aligned_nboxes, tree.box_child_ids, tree.box_flags, peer_lists.peer_list_starts, peer_lists.peer_lists) + tuple( tree.bounding_box[0]) + args def __init__(self, extra_args, ball_center_and_radius_expr, leaf_found_op, preamble="", name="area_query_elwise"): def wrap_in_macro(decl, expr): return """ <%def name=\"{decl}\"> {expr} </%def> """.format(decl=decl, expr=expr) from boxtree.traversal import TRAVERSAL_PREAMBLE_MAKO_DEFS self.elwise_template = ElementwiseTemplate( arguments=r"""//CL:mako// coord_t *box_centers, coord_t root_extent, box_level_t *box_levels, box_id_t aligned_nboxes, box_id_t *box_child_ids, box_flags_t *box_flags, peer_list_idx_t *peer_list_starts, box_id_t *peer_lists, %for ax in AXIS_NAMES[:dimensions]: coord_t bbox_min_${ax}, %endfor """ + extra_args, operation="//CL:mako//\n" + wrap_in_macro( "get_ball_center_and_radius(ball_center, ball_radius, i)", ball_center_and_radius_expr) + wrap_in_macro( "leaf_found_op(leaf_box_id, ball_center, ball_radius)", leaf_found_op) + TRAVERSAL_PREAMBLE_MAKO_DEFS + GUIDING_BOX_FINDER_MACRO + AREA_QUERY_WALKER_BODY, name=name, preamble=preamble) def generate(self, context, dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype, max_levels, extra_var_values=(), extra_type_aliases=(), extra_preamble=""): from pyopencl.tools import dtype_to_ctype from boxtree import box_flags_enum from boxtree.traversal import TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES from boxtree.tree_build import TreeBuilder render_vars = ( ("np", np), ("dimensions", dimensions), ("dtype_to_ctype", dtype_to_ctype), ("box_id_dtype", box_id_dtype), ("particle_id_dtype", None), ("coord_dtype", coord_dtype), ("vec_types", tuple(cl.cltypes.vec_types.items())), ("max_levels", max_levels), ("AXIS_NAMES", AXIS_NAMES), ("box_flags_enum", box_flags_enum), ("peer_list_idx_dtype", peer_list_idx_dtype), ("debug", False), ("root_extent_stretch_factor", TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR), ) preamble = Template( # HACK: box_flags_t and coord_t are defined here and # in the template below, so disable typedef redifinition warnings. """ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wtypedef-redefinition" """ + TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES + """ #pragma clang diagnostic pop """, strict_undefined=True).render(**dict(render_vars)) return self.elwise_template.build( context, type_aliases=( ("coord_t", coord_dtype), ("box_id_t", box_id_dtype), ("peer_list_idx_t", peer_list_idx_dtype), ("box_level_t", np.uint8), ("box_flags_t", box_flags_enum.dtype), ) + extra_type_aliases, var_values=render_vars + extra_var_values, more_preamble=preamble + extra_preamble)
${walk_advance()} } } """ from pyopencl.elementwise import ElementwiseTemplate from boxtree.tools import InlineBinarySearch STARTS_EXPANDER_TEMPLATE = ElementwiseTemplate( arguments=r""" idx_t *dst, idx_t *starts, idx_t starts_len """, operation=r"""//CL// /* Find my index in starts, place the index in dst. */ dst[i] = bsearch(starts, starts_len, i); """, name="starts_expander", preamble=str(InlineBinarySearch("idx_t"))) # }}} # {{{ area query elementwise template class AreaQueryElementwiseTemplate(object): """ Experimental: Intended as a way to perform operations in the body of an area query.
class AreaQueryElementwiseTemplate(object): """ Experimental: Intended as a way to perform operations in the body of an area query. """ @staticmethod def unwrap_args(tree, peer_lists, *args): return (tree.box_centers, tree.root_extent, tree.box_levels, tree.aligned_nboxes, tree.box_child_ids, tree.box_flags, peer_lists.peer_list_starts, peer_lists.peer_lists) + tuple(tree.bounding_box[0]) + args def __init__(self, extra_args, ball_center_and_radius_expr, leaf_found_op, preamble="", name="area_query_elwise"): def wrap_in_macro(decl, expr): return """ <%def name=\"{decl}\"> {expr} </%def> """.format(decl=decl, expr=expr) from boxtree.traversal import TRAVERSAL_PREAMBLE_MAKO_DEFS self.elwise_template = ElementwiseTemplate( arguments=r"""//CL:mako// coord_t *box_centers, coord_t root_extent, box_level_t *box_levels, box_id_t aligned_nboxes, box_id_t *box_child_ids, box_flags_t *box_flags, peer_list_idx_t *peer_list_starts, box_id_t *peer_lists, %for ax in AXIS_NAMES[:dimensions]: coord_t bbox_min_${ax}, %endfor """ + extra_args, operation="//CL:mako//\n" + wrap_in_macro("get_ball_center_and_radius(ball_center, ball_radius, i)", ball_center_and_radius_expr) + wrap_in_macro("leaf_found_op(leaf_box_id, ball_center, ball_radius)", leaf_found_op) + TRAVERSAL_PREAMBLE_MAKO_DEFS + GUIDING_BOX_FINDER_MACRO + AREA_QUERY_WALKER_BODY, name=name, preamble=preamble) def generate(self, context, dimensions, coord_dtype, box_id_dtype, peer_list_idx_dtype, max_levels, extra_var_values=(), extra_type_aliases=(), extra_preamble=""): from pyopencl.tools import dtype_to_ctype from boxtree import box_flags_enum from boxtree.traversal import TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES from boxtree.tree_build import TreeBuilder render_vars = ( ("dimensions", dimensions), ("dtype_to_ctype", dtype_to_ctype), ("box_id_dtype", box_id_dtype), ("particle_id_dtype", None), ("coord_dtype", coord_dtype), ("vec_types", tuple(cl.array.vec.types.items())), ("max_levels", max_levels), ("AXIS_NAMES", AXIS_NAMES), ("box_flags_enum", box_flags_enum), ("peer_list_idx_dtype", peer_list_idx_dtype), ("debug", False), ("root_extent_stretch_factor", TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR), # Not used (but required by TRAVERSAL_PREAMBLE_TEMPLATE) ("stick_out_factor", 0), ) preamble = Template( # HACK: box_flags_t and coord_t are defined here and # in the template below, so disable typedef redifinition warnings. """ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wtypedef-redefinition" """ + TRAVERSAL_PREAMBLE_TYPEDEFS_AND_DEFINES + """ #pragma clang diagnostic pop """, strict_undefined=True).render(**dict(render_vars)) return self.elwise_template.build(context, type_aliases=( ("coord_t", coord_dtype), ("box_id_t", box_id_dtype), ("peer_list_idx_t", peer_list_idx_dtype), ("box_level_t", np.uint8), ("box_flags_t", box_flags_enum.dtype), ) + extra_type_aliases, var_values=render_vars + extra_var_values, more_preamble=preamble + extra_preamble)
def get_new_nb_sources_knl(write_counts): from pyopencl.elementwise import ElementwiseTemplate return ElementwiseTemplate("""//CL:mako// /* input: */ box_id_t *target_or_target_parent_boxes_from_tgt_boxes, box_id_t *neighbor_source_boxes_starts, box_id_t *sep_close_smaller_starts, box_id_t *sep_close_bigger_starts, %if not write_counts: box_id_t *neighbor_source_boxes_lists, box_id_t *sep_close_smaller_lists, box_id_t *sep_close_bigger_lists, box_id_t *new_neighbor_source_boxes_starts, %endif /* output: */ %if write_counts: box_id_t *new_neighbor_source_boxes_counts, %else: box_id_t *new_neighbor_source_boxes_lists, %endif """, """//CL:mako// box_id_t itgt_box = i; box_id_t itarget_or_target_parent_box = target_or_target_parent_boxes_from_tgt_boxes[itgt_box]; box_id_t neighbor_source_boxes_start = neighbor_source_boxes_starts[itgt_box]; box_id_t neighbor_source_boxes_count = neighbor_source_boxes_starts[itgt_box + 1] - neighbor_source_boxes_start; box_id_t sep_close_smaller_start = sep_close_smaller_starts[itgt_box]; box_id_t sep_close_smaller_count = sep_close_smaller_starts[itgt_box + 1] - sep_close_smaller_start; box_id_t sep_close_bigger_start = sep_close_bigger_starts[itarget_or_target_parent_box]; box_id_t sep_close_bigger_count = sep_close_bigger_starts[itarget_or_target_parent_box + 1] - sep_close_bigger_start; %if write_counts: if (itgt_box == 0) new_neighbor_source_boxes_counts[0] = 0; new_neighbor_source_boxes_counts[itgt_box + 1] = neighbor_source_boxes_count + sep_close_smaller_count + sep_close_bigger_count ; %else: box_id_t cur_idx = new_neighbor_source_boxes_starts[itgt_box]; #define COPY_FROM(NAME) \ for (box_id_t i = 0; i < NAME##_count; ++i) \ new_neighbor_source_boxes_lists[cur_idx++] = \ NAME##_lists[NAME##_start+i]; COPY_FROM(neighbor_source_boxes) COPY_FROM(sep_close_smaller) COPY_FROM(sep_close_bigger) %endif """).build( queue.context, type_aliases=( ("box_id_t", self.tree.box_id_dtype), ), var_values=( ("write_counts", write_counts), ) )
# }}} # {{{ level start box nrs LEVEL_START_BOX_NR_EXTRACTOR_TEMPLATE = ElementwiseTemplate( arguments="""//CL// box_id_t *level_start_box_nrs, box_level_t *box_levels, box_id_t *box_list, box_id_t *list_level_start_box_nrs, """, operation=r"""//CL// // Kernel is ranged so that this is true: // assert(i > 0); box_id_t my_box_id = box_list[i]; box_id_t prev_box_id = box_list[i-1]; int my_level = box_levels[my_box_id]; box_id_t my_level_start = level_start_box_nrs[my_level]; if (prev_box_id < my_level_start && my_level_start <= my_box_id) list_level_start_box_nrs[my_level] = i; """, name="extract_level_start_box_nrs") # }}} # {{{ colleagues