def append_parray(self, parray, align=True, update_constants=False): """ Add particles from a particle array properties that are not there in self will be added """ if parray.gpu is None: parray.set_device_helper(DeviceHelper(parray)) if parray.gpu.get_number_of_particles() == 0: return num_extra_particles = parray.gpu.get_number_of_particles() old_num_particles = self.get_number_of_particles() new_num_particles = num_extra_particles + old_num_particles # extend current arrays by the required number of particles self.extend(num_extra_particles) my_stride = self._particle_array.stride for prop_name in parray.gpu.properties: stride = parray.stride.get(prop_name, 1) if stride > 1 and prop_name not in my_stride: my_stride[prop_name] = stride if prop_name in self.properties: arr = self._data[prop_name] source = parray.gpu.get_device_array(prop_name) arr.dev[old_num_particles * stride:] = source.dev else: # meaning this property is not there in self. dtype = parray.gpu.get_device_array(prop_name).dtype arr = Array(dtype, n=new_num_particles * stride, backend=self.backend) arr.fill(parray.default_values[prop_name]) self.update_prop(prop_name, arr) # now add the values to the end of the created array dest = self._data[prop_name] source = parray.gpu.get_device_array(prop_name) dest.dev[old_num_particles * stride:] = source.dev if update_constants: for const in parray.gpu.constants: if const not in self.constants: arr = parray.gpu.get_device_array(const) self.update_const(const, arr.copy()) if num_extra_particles > 0 and align: self.align_particles()
def remove_particles(self, indices): """ Remove particles whose indices are given in index_list. We repeatedly interchange the values of the last element and values from the index_list and reduce the size of the array by one. This is done for every property that is being maintained. Parameters ---------- indices : array an array of indices, this array can be a list, numpy array or a LongArray. Notes ----- Pseudo-code for the implementation:: if index_list.length > number of particles raise ValueError sorted_indices <- index_list sorted in ascending order. for every every array in property_array array.remove(sorted_indices) """ if len(indices) > self.get_number_of_particles(): msg = 'Number of particles to be removed is greater than' msg += 'number of particles in array' raise ValueError(msg) num_particles = self.get_number_of_particles() if_remove = Array(np.int32, n=num_particles, backend=self.backend) if_remove.fill(0) fill_if_remove_knl = self._get_remove_particles_kernel() fill_if_remove_knl(indices, if_remove, num_particles) self._remove_particles_bool(if_remove)
def remove_particles(self, indices, align=True): """ Remove particles whose indices are given in index_list. Parameters ---------- indices : array an array of indices, this array can be a list, numpy array or a LongArray. """ if len(indices) > self.get_number_of_particles(): msg = 'Number of particles to be removed is greater than' msg += 'number of particles in array' raise ValueError(msg) num_particles = self.get_number_of_particles() if_remove = Array(np.int32, n=num_particles, backend=self.backend) if_remove.fill(0) fill_if_remove_knl = self._get_remove_particles_kernel() fill_if_remove_knl(indices, if_remove, num_particles) self._remove_particles_bool(if_remove, align=align)
def _build_tree(self, fixed_depth=None): # We build the tree one layer at a time. We stop building new # layers after either all the # nodes are leaves or after reaching the target depth (fixed_depth). # At this point, the information for each layer is segmented / not # contiguous in memory, and so we run a merge_layers procedure to # move the data for all layers into a single array. # # The procedure for building each layer can be split up as follows # 1) Determine which child each particle is going to belong to in the # next layer # 2) Perform a kind of segmented scan over this. This gives us the # new order of the particles so that consecutive particles lie in # the same child # 3) Reorder the particles based on this order # 4) Create a new layer and set the node data for the new layer. We # get to know which particles belong to each node directly from the # results of step 2 # 5) Set the predicted offsets of the children of the nodes in the # new layer. If a node has fewer than leaf_size particles, it's a # leaf. A kind of prefix sum over this directly let's us know the # predicted offsets. # Rinse and repeat for building more layers. # # Note that after building the last layer, the predicted offsets for # the children might not be correctly since we're not going to build # more layers. The _merge_layers procedure sets the offsets in the # last layer to -1 to correct this. num_leaves_here = 0 n = self.n temp_vars = {} self.depth = 0 self.num_nodes = [1] # Cumulative sum of nodes in the previous layers csum_nodes_prev = 0 csum_nodes = 1 # Initialize temporary data (but persistent across layers) self._create_temp_vars(temp_vars) child_count_prefix_sum = Array(get_vector_dtype('uint', self.k), n=n, backend='opencl') seg_flag = Array(cl.cltypes.char, n=n, backend='opencl') seg_flag.fill(0) seg_flag.dev[0] = 1 offsets_temp = [Array(np.int32, n=1, backend='opencl')] offsets_temp[-1].fill(1) pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')] pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n)) # FIXME: Depths above 20 possible and feasible for binary / quad trees loop_lim = min(fixed_depth, 20) for depth in range(1, loop_lim): num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here) if num_nodes == 0: break else: self.depth += 1 self.num_nodes.append(num_nodes) # Allocate new layer offsets_temp.append( Array(np.int32, n=self.num_nodes[-1], backend='opencl')) pbounds_temp.append( Array(cl.cltypes.uint2, n=self.num_nodes[-1], backend='opencl')) # Generate particle index and reorder the particles self._reorder_particles(depth, child_count_prefix_sum, offsets_temp[-2], pbounds_temp[-2], seg_flag, csum_nodes_prev, temp_vars) num_leaves_here = self._update_node_data( offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1], pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes + self.num_nodes[-1], n) csum_nodes_prev = csum_nodes csum_nodes += self.num_nodes[-1] self._merge_layers(offsets_temp, pbounds_temp) self._clean_temp_vars(temp_vars)
class Tree(object): """k-ary Tree """ def __init__(self, n, k=8, leaf_size=32): self.ctx = get_context() self.queue = get_queue() self.sorted = False self.main_helper = get_helper(os.path.join('tree', 'tree.mako')) self.initialized = False self.preamble = "" self.leaf_size = leaf_size self.k = k self.n = n self.sorted = False self.depth = 0 self.index_function_args = [] self.index_function_arg_ctypes = [] self.index_function_arg_dtypes = [] self.index_function_consts = [] self.index_function_const_ctypes = [] self.index_code = "" self.set_index_function_info() def set_index_function_info(self): raise NotImplementedError def get_data_args(self): return [getattr(self, v) for v in self.index_function_args] def get_index_constants(self, depth): raise NotImplementedError def _initialize_data(self): self.sorted = False num_particles = self.n self.pids = Array(np.uint32, n=num_particles, backend='opencl') self.cids = Array(np.uint32, n=num_particles, backend='opencl') self.cids.fill(0) for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): setattr(self, var, Array(dtype, n=num_particles, backend='opencl')) # Filled after tree built self.pbounds = None self.offsets = None self.initialized = True def _reinitialize_data(self): self.sorted = False num_particles = self.n self.pids.resize(num_particles) self.cids.resize(num_particles) self.cids.fill(0) for var in self.index_function_args: getattr(self, var).resize(num_particles) # Filled after tree built self.pbounds = None self.offsets = None def _setup_build(self): if not self.initialized: self._initialize_data() else: self._reinitialize_data() def _build(self, fixed_depth=None): self._build_tree(fixed_depth) ########################################################################### # Core construction algorithm and helper functions ########################################################################### # A little bit of manual book-keeping for temporary variables. # More specifically, these temporary variables would otherwise be thrown # away after building each layer of the tree. # We could instead just allocate new arrays after building each layer and # and let the GC take care of stuff but I'm guessing this is a # a better approach to save on memory def _create_temp_vars(self, temp_vars): n = self.n temp_vars['pids'] = Array(np.uint32, n=n, backend='opencl') for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): temp_vars[var] = Array(dtype, n=n, backend='opencl') temp_vars['cids'] = Array(np.uint32, n=n, backend='opencl') def _exchange_temp_vars(self, temp_vars): for k in temp_vars.keys(): t = temp_vars[k] temp_vars[k] = getattr(self, k) setattr(self, k, t) def _clean_temp_vars(self, temp_vars): for k in list(temp_vars.keys()): del temp_vars[k] def _get_temp_data_args(self, temp_vars): result = [temp_vars[v] for v in self.index_function_args] return result def _reorder_particles(self, depth, child_count_prefix_sum, offsets_parent, pbounds_parent, seg_flag, csum_nodes_prev, temp_vars): # Scan args = [('__global ' + ctype + ' *' + v) for v, ctype in zip( self.index_function_args, self.index_function_arg_ctypes)] args += [(ctype + ' ' + v) for v, ctype in zip( self.index_function_consts, self.index_function_const_ctypes)] args = ', '.join(args) particle_kernel = _get_particle_kernel(self.ctx, self.k, args, self.index_code) args = [seg_flag.dev, child_count_prefix_sum.dev] args += [x.dev for x in self.get_data_args()] args += self.get_index_constants(depth) particle_kernel(*args) # Reorder particles reorder_particles = self.main_helper.get_kernel( 'reorder_particles', k=self.k, data_vars=tuple(self.index_function_args), data_var_ctypes=tuple(self.index_function_arg_ctypes), const_vars=tuple(self.index_function_consts), const_var_ctypes=tuple(self.index_function_const_ctypes), index_code=self.index_code) args = [ self.pids.dev, self.cids.dev, seg_flag.dev, pbounds_parent.dev, offsets_parent.dev, child_count_prefix_sum.dev, temp_vars['pids'].dev, temp_vars['cids'].dev ] args += [x.dev for x in self.get_data_args()] args += [x.dev for x in self._get_temp_data_args(temp_vars)] args += self.get_index_constants(depth) args += [np.uint32(csum_nodes_prev)] reorder_particles(*args) self._exchange_temp_vars(temp_vars) def _merge_layers(self, offsets_temp, pbounds_temp): curr_offset = 0 total_nodes = 0 for i in range(self.depth + 1): total_nodes += self.num_nodes[i] self.offsets = Array(np.int32, n=total_nodes, backend='opencl') self.pbounds = Array(cl.cltypes.uint2, n=total_nodes, backend='opencl') append_layer = self.main_helper.get_kernel('append_layer') self.total_nodes = total_nodes for i in range(self.depth + 1): append_layer(offsets_temp[i].dev, pbounds_temp[i].dev, self.offsets.dev, self.pbounds.dev, np.int32(curr_offset), np.uint8(i == self.depth)) curr_offset += self.num_nodes[i] def _update_node_data(self, offsets_prev, pbounds_prev, offsets, pbounds, seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes_next, n): """Update node data and return number of children which are leaves.""" # Update particle-related data of children set_node_data = self.main_helper.get_kernel("set_node_data", k=self.k) set_node_data(offsets_prev.dev, pbounds_prev.dev, offsets.dev, pbounds.dev, seg_flag.dev, child_count_prefix_sum.dev, np.uint32(csum_nodes), np.uint32(n)) # Set children offsets leaf_count = Array(np.uint32, n=1, backend='opencl') set_offsets = _get_set_offset_kernel(self.ctx, self.k, self.leaf_size) set_offsets(pbounds.dev, offsets.dev, leaf_count.dev, np.uint32(csum_nodes_next)) return leaf_count.dev[0].get() def _build_tree(self, fixed_depth=None): # We build the tree one layer at a time. We stop building new # layers after either all the # nodes are leaves or after reaching the target depth (fixed_depth). # At this point, the information for each layer is segmented / not # contiguous in memory, and so we run a merge_layers procedure to # move the data for all layers into a single array. # # The procedure for building each layer can be split up as follows # 1) Determine which child each particle is going to belong to in the # next layer # 2) Perform a kind of segmented scan over this. This gives us the # new order of the particles so that consecutive particles lie in # the same child # 3) Reorder the particles based on this order # 4) Create a new layer and set the node data for the new layer. We # get to know which particles belong to each node directly from the # results of step 2 # 5) Set the predicted offsets of the children of the nodes in the # new layer. If a node has fewer than leaf_size particles, it's a # leaf. A kind of prefix sum over this directly let's us know the # predicted offsets. # Rinse and repeat for building more layers. # # Note that after building the last layer, the predicted offsets for # the children might not be correctly since we're not going to build # more layers. The _merge_layers procedure sets the offsets in the # last layer to -1 to correct this. num_leaves_here = 0 n = self.n temp_vars = {} self.depth = 0 self.num_nodes = [1] # Cumulative sum of nodes in the previous layers csum_nodes_prev = 0 csum_nodes = 1 # Initialize temporary data (but persistent across layers) self._create_temp_vars(temp_vars) child_count_prefix_sum = Array(get_vector_dtype('uint', self.k), n=n, backend='opencl') seg_flag = Array(cl.cltypes.char, n=n, backend='opencl') seg_flag.fill(0) seg_flag.dev[0] = 1 offsets_temp = [Array(np.int32, n=1, backend='opencl')] offsets_temp[-1].fill(1) pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')] pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n)) # FIXME: Depths above 20 possible and feasible for binary / quad trees loop_lim = min(fixed_depth, 20) for depth in range(1, loop_lim): num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here) if num_nodes == 0: break else: self.depth += 1 self.num_nodes.append(num_nodes) # Allocate new layer offsets_temp.append( Array(np.int32, n=self.num_nodes[-1], backend='opencl')) pbounds_temp.append( Array(cl.cltypes.uint2, n=self.num_nodes[-1], backend='opencl')) # Generate particle index and reorder the particles self._reorder_particles(depth, child_count_prefix_sum, offsets_temp[-2], pbounds_temp[-2], seg_flag, csum_nodes_prev, temp_vars) num_leaves_here = self._update_node_data( offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1], pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes + self.num_nodes[-1], n) csum_nodes_prev = csum_nodes csum_nodes += self.num_nodes[-1] self._merge_layers(offsets_temp, pbounds_temp) self._clean_temp_vars(temp_vars) ########################################################################### # Misc ########################################################################### def _get_unique_cids_and_count(self): n = self.n self.unique_cids = Array(np.uint32, n=n, backend='opencl') self.unique_cids_map = Array(np.uint32, n=n, backend='opencl') uniq_count = Array(np.uint32, n=1, backend='opencl') unique_cids_kernel = _get_unique_cids_kernel(self.ctx) unique_cids_kernel(self.cids.dev, self.unique_cids_map.dev, self.unique_cids.dev, uniq_count.dev) self.unique_cid_count = uniq_count.dev[0].get() def get_leaves(self): leaves = Array(np.uint32, n=self.offsets.dev.shape[0], backend='opencl') num_leaves = Array(np.uint32, n=1, backend='opencl') leaves_kernel = _get_leaves_kernel(self.ctx, self.leaf_size) leaves_kernel(self.offsets.dev, self.pbounds.dev, leaves.dev, num_leaves.dev) num_leaves = num_leaves.dev[0].get() return leaves.dev[:num_leaves], num_leaves def _sort(self): """Set tree as being sorted The particle array needs to be aligned by the caller! """ if not self.sorted: self.sorted = 1 ########################################################################### # Tree API ########################################################################### def allocate_node_prop(self, dtype): return Array(dtype, n=self.total_nodes, backend='opencl') def allocate_leaf_prop(self, dtype): return Array(dtype, n=int(self.unique_cid_count), backend='opencl') def get_preamble(self): if self.sorted: return "#define PID(idx) (idx)" else: return "#define PID(idx) (pids[idx])" def get_leaf_size_partitions(self, group_min, group_max): """Partition leaves based on leaf size Parameters ---------- group_min Minimum leaf size group_max Maximum leaf size Returns ------- groups : Array An array which contains the cell ids of leaves with leaf size > group_min and leaf size <= group_max group_count : int The number of leaves which satisfy the given condition on the leaf size """ groups = Array(np.uint32, n=int(self.unique_cid_count), backend='opencl') group_count = Array(np.uint32, n=1, backend='opencl') get_cid_groups = _get_cid_groups_kernel(self.ctx) get_cid_groups(self.unique_cids.dev[:self.unique_cid_count], self.pbounds.dev, groups.dev, group_count.dev, np.int32(group_min), np.int32(group_max)) result = groups, int(group_count.dev[0].get()) return result def tree_bottom_up(self, args, setup, leaf_operation, node_operation, output_expr, preamble=""): return tree_bottom_up(self.ctx, args, setup, leaf_operation, node_operation, output_expr, preamble) def leaf_tree_traverse(self, args, setup, node_operation, leaf_operation, output_expr, common_operation="", preamble=""): """ Traverse this (source) tree. One thread for each leaf of destination tree. """ return leaf_tree_traverse(self.ctx, self.k, args, setup, node_operation, leaf_operation, output_expr, common_operation, preamble) def point_tree_traverse(self, args, setup, node_operation, leaf_operation, output_expr, common_operation="", preamble=""): """ Traverse this (source) tree. One thread for each particle of destination tree. """ return point_tree_traverse(self.ctx, self.k, args, setup, node_operation, leaf_operation, output_expr, common_operation, preamble)