def _create_temp_vars(self, temp_vars): n = self.n temp_vars['pids'] = Array(np.uint32, n=n, backend='opencl') for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): temp_vars[var] = Array(dtype, n=n, backend='opencl') temp_vars['cids'] = Array(np.uint32, n=n, backend='opencl')
def get_leaf_size_partitions(self, group_min, group_max): """Partition leaves based on leaf size Parameters ---------- group_min Minimum leaf size group_max Maximum leaf size Returns ------- groups : Array An array which contains the cell ids of leaves with leaf size > group_min and leaf size <= group_max group_count : int The number of leaves which satisfy the given condition on the leaf size """ groups = Array(np.uint32, n=int(self.unique_cid_count), backend='opencl') group_count = Array(np.uint32, n=1, backend='opencl') get_cid_groups = _get_cid_groups_kernel(self.ctx) get_cid_groups(self.unique_cids.dev[:self.unique_cid_count], self.pbounds.dev, groups.dev, group_count.dev, np.int32(group_min), np.int32(group_max)) result = groups, int(group_count.dev[0].get()) return result
def _get_unique_cids_and_count(self): n = self.n self.unique_cids = Array(np.uint32, n=n, backend='opencl') self.unique_cids_map = Array(np.uint32, n=n, backend='opencl') uniq_count = Array(np.uint32, n=1, backend='opencl') unique_cids_kernel = _get_unique_cids_kernel(self.ctx) unique_cids_kernel(self.cids.dev, self.unique_cids_map.dev, self.unique_cids.dev, uniq_count.dev) self.unique_cid_count = uniq_count.dev[0].get()
def _add_prop_or_const(self, name, carray): """Add a new property or constant given the name and carray, note that this assumes that this property is already added to the particle array. """ np_array = self._get_array(carray) g_ary = Array(np_array.dtype, n=carray.length, backend=self.backend) g_ary.set(np_array) self._data[name] = g_ary setattr(self, name, g_ary)
def get_leaves(self): leaves = Array(np.uint32, n=self.offsets.dev.shape[0], backend='opencl') num_leaves = Array(np.uint32, n=1, backend='opencl') leaves_kernel = _get_leaves_kernel(self.ctx, self.leaf_size) leaves_kernel(self.offsets.dev, self.pbounds.dev, leaves.dev, num_leaves.dev) num_leaves = num_leaves.dev[0].get() return leaves.dev[:num_leaves], num_leaves
def append_parray(self, parray, align=True, update_constants=False): """ Add particles from a particle array properties that are not there in self will be added """ if parray.gpu is None: parray.set_device_helper(DeviceHelper(parray)) if parray.gpu.get_number_of_particles() == 0: return num_extra_particles = parray.gpu.get_number_of_particles() old_num_particles = self.get_number_of_particles() new_num_particles = num_extra_particles + old_num_particles # extend current arrays by the required number of particles self.extend(num_extra_particles) my_stride = self._particle_array.stride for prop_name in parray.gpu.properties: stride = parray.stride.get(prop_name, 1) if stride > 1 and prop_name not in my_stride: my_stride[prop_name] = stride if prop_name in self.properties: arr = self._data[prop_name] source = parray.gpu.get_device_array(prop_name) arr.dev[old_num_particles * stride:] = source.dev else: # meaning this property is not there in self. dtype = parray.gpu.get_device_array(prop_name).dtype arr = Array(dtype, n=new_num_particles * stride, backend=self.backend) arr.fill(parray.default_values[prop_name]) self.update_prop(prop_name, arr) # now add the values to the end of the created array dest = self._data[prop_name] source = parray.gpu.get_device_array(prop_name) dest.dev[old_num_particles * stride:] = source.dev if update_constants: for const in parray.gpu.constants: if const not in self.constants: arr = parray.gpu.get_device_array(const) self.update_const(const, arr.copy()) if num_extra_particles > 0 and align: self.align_particles()
def _initialize_data(self): self.sorted = False num_particles = self.n self.pids = Array(np.uint32, n=num_particles, backend='opencl') self.cids = Array(np.uint32, n=num_particles, backend='opencl') self.cids.fill(0) for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): setattr(self, var, Array(dtype, n=num_particles, backend='opencl')) # Filled after tree built self.pbounds = None self.offsets = None self.initialized = True
def _merge_layers(self, offsets_temp, pbounds_temp): curr_offset = 0 total_nodes = 0 for i in range(self.depth + 1): total_nodes += self.num_nodes[i] self.offsets = Array(np.int32, n=total_nodes, backend='opencl') self.pbounds = Array(cl.cltypes.uint2, n=total_nodes, backend='opencl') append_layer = self.main_helper.get_kernel('append_layer') self.total_nodes = total_nodes for i in range(self.depth + 1): append_layer(offsets_temp[i].dev, pbounds_temp[i].dev, self.offsets.dev, self.pbounds.dev, np.int32(curr_offset), np.uint8(i == self.depth)) curr_offset += self.num_nodes[i]
def _remove_particles_bool(self, if_remove, align=True): """ Remove particle i if if_remove[i] is True """ num_indices = int(array.sum(if_remove, backend=self.backend)) if num_indices == 0: return num_particles = self.get_number_of_particles() new_indices = Array(np.uint32, n=(num_particles - num_indices), backend=self.backend) num_removed_particles = array.empty(1, dtype=np.int32, backend=self.backend) remove_knl, stride_knl = self._get_remove_particles_bool_kernels() remove_knl(if_remove=if_remove, new_indices=new_indices, num_removed_particles=num_removed_particles, num_particles=num_particles) new_num_particles = num_particles - int(num_removed_particles.get()) strides = set(self._particle_array.stride.values()) s_indices = {1: new_indices} for stride in strides: if stride == 1: continue size = new_num_particles * stride s_index = Array(np.uint32, n=size, backend=self.backend) stride_knl(new_indices, s_index, size, stride) s_indices[stride] = s_index for prop in self.properties: stride = self._particle_array.stride.get(prop, 1) s_index = s_indices[stride] self._data[prop].align(s_index) setattr(self, prop, self._data[prop]) if align: self.align_particles()
def find_neighbor_cids(self, tree_src): neighbor_cid_count = Array(np.uint32, n=self.unique_cid_count + 1, backend='opencl') find_neighbor_cid_counts = self._leaf_neighbor_operation( tree_src, args="uint2 *pbounds, int *cnt", setup="int count=0", operation=""" if (pbounds[cid_src].s0 < pbounds[cid_src].s1) count++; """, output_expr="cnt[i] = count;" ) find_neighbor_cid_counts = profile_kernel( find_neighbor_cid_counts, 'find_neighbor_cid_count', backend='opencl' ) find_neighbor_cid_counts(tree_src.pbounds.dev, neighbor_cid_count.dev) neighbor_psum = _get_neighbor_count_prefix_sum_kernel(self.ctx) neighbor_psum(neighbor_cid_count.dev) total_neighbors = int(neighbor_cid_count.dev[-1].get()) neighbor_cids = Array(np.uint32, n=total_neighbors, backend='opencl') find_neighbor_cids = self._leaf_neighbor_operation( tree_src, args="uint2 *pbounds, int *cnt, int *neighbor_cids", setup="int offset=cnt[i];", operation=""" if (pbounds[cid_src].s0 < pbounds[cid_src].s1) neighbor_cids[offset++] = cid_src; """, output_expr="" ) find_neighbor_cids = profile_kernel( find_neighbor_cids, 'find_neighbor_cids', backend='opencl') find_neighbor_cids(tree_src.pbounds.dev, neighbor_cid_count.dev, neighbor_cids.dev) return neighbor_cid_count, neighbor_cids
def remove_particles(self, indices): """ Remove particles whose indices are given in index_list. We repeatedly interchange the values of the last element and values from the index_list and reduce the size of the array by one. This is done for every property that is being maintained. Parameters ---------- indices : array an array of indices, this array can be a list, numpy array or a LongArray. Notes ----- Pseudo-code for the implementation:: if index_list.length > number of particles raise ValueError sorted_indices <- index_list sorted in ascending order. for every every array in property_array array.remove(sorted_indices) """ if len(indices) > self.get_number_of_particles(): msg = 'Number of particles to be removed is greater than' msg += 'number of particles in array' raise ValueError(msg) num_particles = self.get_number_of_particles() if_remove = Array(np.int32, n=num_particles, backend=self.backend) if_remove.fill(0) fill_if_remove_knl = self._get_remove_particles_kernel() fill_if_remove_knl(indices, if_remove, num_particles) self._remove_particles_bool(if_remove)
def remove_particles(self, indices, align=True): """ Remove particles whose indices are given in index_list. Parameters ---------- indices : array an array of indices, this array can be a list, numpy array or a LongArray. """ if len(indices) > self.get_number_of_particles(): msg = 'Number of particles to be removed is greater than' msg += 'number of particles in array' raise ValueError(msg) num_particles = self.get_number_of_particles() if_remove = Array(np.int32, n=num_particles, backend=self.backend) if_remove.fill(0) fill_if_remove_knl = self._get_remove_particles_kernel() fill_if_remove_knl(indices, if_remove, num_particles) self._remove_particles_bool(if_remove, align=align)
def _update_node_data(self, offsets_prev, pbounds_prev, offsets, pbounds, seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes_next, n): """Update node data and return number of children which are leaves.""" # Update particle-related data of children set_node_data = self.main_helper.get_kernel("set_node_data", k=self.k) set_node_data(offsets_prev.dev, pbounds_prev.dev, offsets.dev, pbounds.dev, seg_flag.dev, child_count_prefix_sum.dev, np.uint32(csum_nodes), np.uint32(n)) # Set children offsets leaf_count = Array(np.uint32, n=1, backend='opencl') set_offsets = _get_set_offset_kernel(self.ctx, self.k, self.leaf_size) set_offsets(pbounds.dev, offsets.dev, leaf_count.dev, np.uint32(csum_nodes_next)) return leaf_count.dev[0].get()
def _create_ghosts_periodic(self): """Identify boundary particles and create images. We need to find all particles that are within a specified distance from the boundaries and place image copies on the other side of the boundary. Corner reflections need to be accounted for when using domains with multiple periodicity. The periodic domain is specified using the DomainManager object """ copy_props = self.copy_props pa_wrappers = self.pa_wrappers narrays = self.narrays # cell size used to check for periodic ghosts. For summation density # like operations, we need to create two layers of ghost images, this # is configurable via the n_layers argument to the constructor. cell_size = self.n_layers * self.cell_size # periodic domain values xmin, xmax = self.xmin, self.xmax ymin, ymax = self.ymin, self.ymax zmin, zmax = self.zmin, self.zmax xtranslate = self.xtranslate ytranslate = self.ytranslate ztranslate = self.ztranslate # periodicity flags periodic_in_x = self.periodic_in_x periodic_in_y = self.periodic_in_y periodic_in_z = self.periodic_in_z reduce_knl = self._get_ghosts_reduction_kernel() scan_knl = self._get_ghosts_scan_kernel() translate_knl = self._get_translate_kernel() if not self.ghosts: self.ghosts = [ paw.pa.empty_clone(props=copy_props[i]) for i, paw in enumerate(pa_wrappers) ] else: for ghost_pa in self.ghosts: ghost_pa.resize(0) for i in range(narrays): self.ghosts[i].ensure_properties(pa_wrappers[i].pa, props=copy_props[i]) for i, pa_wrapper in enumerate(self.pa_wrappers): ghost_pa = self.ghosts[i] x = pa_wrapper.pa.gpu.x y = pa_wrapper.pa.gpu.y z = pa_wrapper.pa.gpu.z num_extra_particles = reduce_knl(x, y, z, xmin, ymin, zmin, xmax, ymax, zmax, cell_size, periodic_in_x, periodic_in_y, periodic_in_z) num_extra_particles = int(num_extra_particles) indices = Array(np.int32, n=num_extra_particles) masks = Array(np.int32, n=num_extra_particles) scan_knl(periodic_in_x=periodic_in_x, periodic_in_y=periodic_in_y, periodic_in_z=periodic_in_z, x=x, y=y, z=z, xmin=xmin, ymin=ymin, zmin=zmin, xmax=xmax, ymax=ymax, zmax=zmax, cell_size=cell_size, masks=masks, indices=indices) pa_wrapper.pa.extract_particles(indices, ghost_pa, align=False, props=copy_props[i]) translate_knl(ghost_pa.gpu.x, ghost_pa.gpu.y, ghost_pa.gpu.z, ghost_pa.gpu.tag, xtranslate, ytranslate, ztranslate, masks) pa_wrapper.pa.append_parray(ghost_pa, align=False)
def _allocate_memory(self, pa_gpu): shape = getattr(pa_gpu, self.varnames[0]).dev.shape[0] for v in self.varnames: setattr( self, v, Array(ctype_to_dtype(self.c_type), n=shape, backend='opencl'))
class Tree(object): """k-ary Tree """ def __init__(self, n, k=8, leaf_size=32): self.ctx = get_context() self.queue = get_queue() self.sorted = False self.main_helper = get_helper(os.path.join('tree', 'tree.mako')) self.initialized = False self.preamble = "" self.leaf_size = leaf_size self.k = k self.n = n self.sorted = False self.depth = 0 self.index_function_args = [] self.index_function_arg_ctypes = [] self.index_function_arg_dtypes = [] self.index_function_consts = [] self.index_function_const_ctypes = [] self.index_code = "" self.set_index_function_info() def set_index_function_info(self): raise NotImplementedError def get_data_args(self): return [getattr(self, v) for v in self.index_function_args] def get_index_constants(self, depth): raise NotImplementedError def _initialize_data(self): self.sorted = False num_particles = self.n self.pids = Array(np.uint32, n=num_particles, backend='opencl') self.cids = Array(np.uint32, n=num_particles, backend='opencl') self.cids.fill(0) for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): setattr(self, var, Array(dtype, n=num_particles, backend='opencl')) # Filled after tree built self.pbounds = None self.offsets = None self.initialized = True def _reinitialize_data(self): self.sorted = False num_particles = self.n self.pids.resize(num_particles) self.cids.resize(num_particles) self.cids.fill(0) for var in self.index_function_args: getattr(self, var).resize(num_particles) # Filled after tree built self.pbounds = None self.offsets = None def _setup_build(self): if not self.initialized: self._initialize_data() else: self._reinitialize_data() def _build(self, fixed_depth=None): self._build_tree(fixed_depth) ########################################################################### # Core construction algorithm and helper functions ########################################################################### # A little bit of manual book-keeping for temporary variables. # More specifically, these temporary variables would otherwise be thrown # away after building each layer of the tree. # We could instead just allocate new arrays after building each layer and # and let the GC take care of stuff but I'm guessing this is a # a better approach to save on memory def _create_temp_vars(self, temp_vars): n = self.n temp_vars['pids'] = Array(np.uint32, n=n, backend='opencl') for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): temp_vars[var] = Array(dtype, n=n, backend='opencl') temp_vars['cids'] = Array(np.uint32, n=n, backend='opencl') def _exchange_temp_vars(self, temp_vars): for k in temp_vars.keys(): t = temp_vars[k] temp_vars[k] = getattr(self, k) setattr(self, k, t) def _clean_temp_vars(self, temp_vars): for k in list(temp_vars.keys()): del temp_vars[k] def _get_temp_data_args(self, temp_vars): result = [temp_vars[v] for v in self.index_function_args] return result def _reorder_particles(self, depth, child_count_prefix_sum, offsets_parent, pbounds_parent, seg_flag, csum_nodes_prev, temp_vars): # Scan args = [('__global ' + ctype + ' *' + v) for v, ctype in zip( self.index_function_args, self.index_function_arg_ctypes)] args += [(ctype + ' ' + v) for v, ctype in zip( self.index_function_consts, self.index_function_const_ctypes)] args = ', '.join(args) particle_kernel = _get_particle_kernel(self.ctx, self.k, args, self.index_code) args = [seg_flag.dev, child_count_prefix_sum.dev] args += [x.dev for x in self.get_data_args()] args += self.get_index_constants(depth) particle_kernel(*args) # Reorder particles reorder_particles = self.main_helper.get_kernel( 'reorder_particles', k=self.k, data_vars=tuple(self.index_function_args), data_var_ctypes=tuple(self.index_function_arg_ctypes), const_vars=tuple(self.index_function_consts), const_var_ctypes=tuple(self.index_function_const_ctypes), index_code=self.index_code) args = [ self.pids.dev, self.cids.dev, seg_flag.dev, pbounds_parent.dev, offsets_parent.dev, child_count_prefix_sum.dev, temp_vars['pids'].dev, temp_vars['cids'].dev ] args += [x.dev for x in self.get_data_args()] args += [x.dev for x in self._get_temp_data_args(temp_vars)] args += self.get_index_constants(depth) args += [np.uint32(csum_nodes_prev)] reorder_particles(*args) self._exchange_temp_vars(temp_vars) def _merge_layers(self, offsets_temp, pbounds_temp): curr_offset = 0 total_nodes = 0 for i in range(self.depth + 1): total_nodes += self.num_nodes[i] self.offsets = Array(np.int32, n=total_nodes, backend='opencl') self.pbounds = Array(cl.cltypes.uint2, n=total_nodes, backend='opencl') append_layer = self.main_helper.get_kernel('append_layer') self.total_nodes = total_nodes for i in range(self.depth + 1): append_layer(offsets_temp[i].dev, pbounds_temp[i].dev, self.offsets.dev, self.pbounds.dev, np.int32(curr_offset), np.uint8(i == self.depth)) curr_offset += self.num_nodes[i] def _update_node_data(self, offsets_prev, pbounds_prev, offsets, pbounds, seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes_next, n): """Update node data and return number of children which are leaves.""" # Update particle-related data of children set_node_data = self.main_helper.get_kernel("set_node_data", k=self.k) set_node_data(offsets_prev.dev, pbounds_prev.dev, offsets.dev, pbounds.dev, seg_flag.dev, child_count_prefix_sum.dev, np.uint32(csum_nodes), np.uint32(n)) # Set children offsets leaf_count = Array(np.uint32, n=1, backend='opencl') set_offsets = _get_set_offset_kernel(self.ctx, self.k, self.leaf_size) set_offsets(pbounds.dev, offsets.dev, leaf_count.dev, np.uint32(csum_nodes_next)) return leaf_count.dev[0].get() def _build_tree(self, fixed_depth=None): # We build the tree one layer at a time. We stop building new # layers after either all the # nodes are leaves or after reaching the target depth (fixed_depth). # At this point, the information for each layer is segmented / not # contiguous in memory, and so we run a merge_layers procedure to # move the data for all layers into a single array. # # The procedure for building each layer can be split up as follows # 1) Determine which child each particle is going to belong to in the # next layer # 2) Perform a kind of segmented scan over this. This gives us the # new order of the particles so that consecutive particles lie in # the same child # 3) Reorder the particles based on this order # 4) Create a new layer and set the node data for the new layer. We # get to know which particles belong to each node directly from the # results of step 2 # 5) Set the predicted offsets of the children of the nodes in the # new layer. If a node has fewer than leaf_size particles, it's a # leaf. A kind of prefix sum over this directly let's us know the # predicted offsets. # Rinse and repeat for building more layers. # # Note that after building the last layer, the predicted offsets for # the children might not be correctly since we're not going to build # more layers. The _merge_layers procedure sets the offsets in the # last layer to -1 to correct this. num_leaves_here = 0 n = self.n temp_vars = {} self.depth = 0 self.num_nodes = [1] # Cumulative sum of nodes in the previous layers csum_nodes_prev = 0 csum_nodes = 1 # Initialize temporary data (but persistent across layers) self._create_temp_vars(temp_vars) child_count_prefix_sum = Array(get_vector_dtype('uint', self.k), n=n, backend='opencl') seg_flag = Array(cl.cltypes.char, n=n, backend='opencl') seg_flag.fill(0) seg_flag.dev[0] = 1 offsets_temp = [Array(np.int32, n=1, backend='opencl')] offsets_temp[-1].fill(1) pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')] pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n)) # FIXME: Depths above 20 possible and feasible for binary / quad trees loop_lim = min(fixed_depth, 20) for depth in range(1, loop_lim): num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here) if num_nodes == 0: break else: self.depth += 1 self.num_nodes.append(num_nodes) # Allocate new layer offsets_temp.append( Array(np.int32, n=self.num_nodes[-1], backend='opencl')) pbounds_temp.append( Array(cl.cltypes.uint2, n=self.num_nodes[-1], backend='opencl')) # Generate particle index and reorder the particles self._reorder_particles(depth, child_count_prefix_sum, offsets_temp[-2], pbounds_temp[-2], seg_flag, csum_nodes_prev, temp_vars) num_leaves_here = self._update_node_data( offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1], pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes + self.num_nodes[-1], n) csum_nodes_prev = csum_nodes csum_nodes += self.num_nodes[-1] self._merge_layers(offsets_temp, pbounds_temp) self._clean_temp_vars(temp_vars) ########################################################################### # Misc ########################################################################### def _get_unique_cids_and_count(self): n = self.n self.unique_cids = Array(np.uint32, n=n, backend='opencl') self.unique_cids_map = Array(np.uint32, n=n, backend='opencl') uniq_count = Array(np.uint32, n=1, backend='opencl') unique_cids_kernel = _get_unique_cids_kernel(self.ctx) unique_cids_kernel(self.cids.dev, self.unique_cids_map.dev, self.unique_cids.dev, uniq_count.dev) self.unique_cid_count = uniq_count.dev[0].get() def get_leaves(self): leaves = Array(np.uint32, n=self.offsets.dev.shape[0], backend='opencl') num_leaves = Array(np.uint32, n=1, backend='opencl') leaves_kernel = _get_leaves_kernel(self.ctx, self.leaf_size) leaves_kernel(self.offsets.dev, self.pbounds.dev, leaves.dev, num_leaves.dev) num_leaves = num_leaves.dev[0].get() return leaves.dev[:num_leaves], num_leaves def _sort(self): """Set tree as being sorted The particle array needs to be aligned by the caller! """ if not self.sorted: self.sorted = 1 ########################################################################### # Tree API ########################################################################### def allocate_node_prop(self, dtype): return Array(dtype, n=self.total_nodes, backend='opencl') def allocate_leaf_prop(self, dtype): return Array(dtype, n=int(self.unique_cid_count), backend='opencl') def get_preamble(self): if self.sorted: return "#define PID(idx) (idx)" else: return "#define PID(idx) (pids[idx])" def get_leaf_size_partitions(self, group_min, group_max): """Partition leaves based on leaf size Parameters ---------- group_min Minimum leaf size group_max Maximum leaf size Returns ------- groups : Array An array which contains the cell ids of leaves with leaf size > group_min and leaf size <= group_max group_count : int The number of leaves which satisfy the given condition on the leaf size """ groups = Array(np.uint32, n=int(self.unique_cid_count), backend='opencl') group_count = Array(np.uint32, n=1, backend='opencl') get_cid_groups = _get_cid_groups_kernel(self.ctx) get_cid_groups(self.unique_cids.dev[:self.unique_cid_count], self.pbounds.dev, groups.dev, group_count.dev, np.int32(group_min), np.int32(group_max)) result = groups, int(group_count.dev[0].get()) return result def tree_bottom_up(self, args, setup, leaf_operation, node_operation, output_expr, preamble=""): return tree_bottom_up(self.ctx, args, setup, leaf_operation, node_operation, output_expr, preamble) def leaf_tree_traverse(self, args, setup, node_operation, leaf_operation, output_expr, common_operation="", preamble=""): """ Traverse this (source) tree. One thread for each leaf of destination tree. """ return leaf_tree_traverse(self.ctx, self.k, args, setup, node_operation, leaf_operation, output_expr, common_operation, preamble) def point_tree_traverse(self, args, setup, node_operation, leaf_operation, output_expr, common_operation="", preamble=""): """ Traverse this (source) tree. One thread for each particle of destination tree. """ return point_tree_traverse(self.ctx, self.k, args, setup, node_operation, leaf_operation, output_expr, common_operation, preamble)
def _build_tree(self, fixed_depth=None): # We build the tree one layer at a time. We stop building new # layers after either all the # nodes are leaves or after reaching the target depth (fixed_depth). # At this point, the information for each layer is segmented / not # contiguous in memory, and so we run a merge_layers procedure to # move the data for all layers into a single array. # # The procedure for building each layer can be split up as follows # 1) Determine which child each particle is going to belong to in the # next layer # 2) Perform a kind of segmented scan over this. This gives us the # new order of the particles so that consecutive particles lie in # the same child # 3) Reorder the particles based on this order # 4) Create a new layer and set the node data for the new layer. We # get to know which particles belong to each node directly from the # results of step 2 # 5) Set the predicted offsets of the children of the nodes in the # new layer. If a node has fewer than leaf_size particles, it's a # leaf. A kind of prefix sum over this directly let's us know the # predicted offsets. # Rinse and repeat for building more layers. # # Note that after building the last layer, the predicted offsets for # the children might not be correctly since we're not going to build # more layers. The _merge_layers procedure sets the offsets in the # last layer to -1 to correct this. num_leaves_here = 0 n = self.n temp_vars = {} self.depth = 0 self.num_nodes = [1] # Cumulative sum of nodes in the previous layers csum_nodes_prev = 0 csum_nodes = 1 # Initialize temporary data (but persistent across layers) self._create_temp_vars(temp_vars) child_count_prefix_sum = Array(get_vector_dtype('uint', self.k), n=n, backend='opencl') seg_flag = Array(cl.cltypes.char, n=n, backend='opencl') seg_flag.fill(0) seg_flag.dev[0] = 1 offsets_temp = [Array(np.int32, n=1, backend='opencl')] offsets_temp[-1].fill(1) pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')] pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n)) # FIXME: Depths above 20 possible and feasible for binary / quad trees loop_lim = min(fixed_depth, 20) for depth in range(1, loop_lim): num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here) if num_nodes == 0: break else: self.depth += 1 self.num_nodes.append(num_nodes) # Allocate new layer offsets_temp.append( Array(np.int32, n=self.num_nodes[-1], backend='opencl')) pbounds_temp.append( Array(cl.cltypes.uint2, n=self.num_nodes[-1], backend='opencl')) # Generate particle index and reorder the particles self._reorder_particles(depth, child_count_prefix_sum, offsets_temp[-2], pbounds_temp[-2], seg_flag, csum_nodes_prev, temp_vars) num_leaves_here = self._update_node_data( offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1], pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes + self.num_nodes[-1], n) csum_nodes_prev = csum_nodes csum_nodes += self.num_nodes[-1] self._merge_layers(offsets_temp, pbounds_temp) self._clean_temp_vars(temp_vars)
def allocate_node_prop(self, dtype): return Array(dtype, n=self.total_nodes, backend='opencl')
def allocate_leaf_prop(self, dtype): return Array(dtype, n=int(self.unique_cid_count), backend='opencl')