def get_leaf_size_partitions(self, group_min, group_max): """Partition leaves based on leaf size Parameters ---------- group_min Minimum leaf size group_max Maximum leaf size Returns ------- groups : Array An array which contains the cell ids of leaves with leaf size > group_min and leaf size <= group_max group_count : int The number of leaves which satisfy the given condition on the leaf size """ groups = Array(np.uint32, n=int(self.unique_cid_count), backend='opencl') group_count = Array(np.uint32, n=1, backend='opencl') get_cid_groups = _get_cid_groups_kernel(self.ctx) get_cid_groups(self.unique_cids.dev[:self.unique_cid_count], self.pbounds.dev, groups.dev, group_count.dev, np.int32(group_min), np.int32(group_max)) result = groups, int(group_count.dev[0].get()) return result
def _create_temp_vars(self, temp_vars): n = self.n temp_vars['pids'] = Array(np.uint32, n=n, backend='opencl') for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): temp_vars[var] = Array(dtype, n=n, backend='opencl') temp_vars['cids'] = Array(np.uint32, n=n, backend='opencl')
def _get_unique_cids_and_count(self): n = self.n self.unique_cids = Array(np.uint32, n=n, backend='opencl') self.unique_cids_map = Array(np.uint32, n=n, backend='opencl') uniq_count = Array(np.uint32, n=1, backend='opencl') unique_cids_kernel = _get_unique_cids_kernel(self.ctx) unique_cids_kernel(self.cids.dev, self.unique_cids_map.dev, self.unique_cids.dev, uniq_count.dev) self.unique_cid_count = uniq_count.dev[0].get()
def get_leaves(self): leaves = Array(np.uint32, n=self.offsets.dev.shape[0], backend='opencl') num_leaves = Array(np.uint32, n=1, backend='opencl') leaves_kernel = _get_leaves_kernel(self.ctx, self.leaf_size) leaves_kernel(self.offsets.dev, self.pbounds.dev, leaves.dev, num_leaves.dev) num_leaves = num_leaves.dev[0].get() return leaves.dev[:num_leaves], num_leaves
def _initialize_data(self): self.sorted = False num_particles = self.n self.pids = Array(np.uint32, n=num_particles, backend='opencl') self.cids = Array(np.uint32, n=num_particles, backend='opencl') self.cids.fill(0) for var, dtype in zip(self.index_function_args, self.index_function_arg_dtypes): setattr(self, var, Array(dtype, n=num_particles, backend='opencl')) # Filled after tree built self.pbounds = None self.offsets = None self.initialized = True
def _merge_layers(self, offsets_temp, pbounds_temp): curr_offset = 0 total_nodes = 0 for i in range(self.depth + 1): total_nodes += self.num_nodes[i] self.offsets = Array(np.int32, n=total_nodes, backend='opencl') self.pbounds = Array(cl.cltypes.uint2, n=total_nodes, backend='opencl') append_layer = self.main_helper.get_kernel('append_layer') self.total_nodes = total_nodes for i in range(self.depth + 1): append_layer(offsets_temp[i].dev, pbounds_temp[i].dev, self.offsets.dev, self.pbounds.dev, np.int32(curr_offset), np.uint8(i == self.depth)) curr_offset += self.num_nodes[i]
def _remove_particles_bool(self, if_remove, align=True): """ Remove particle i if if_remove[i] is True """ num_indices = int(array.sum(if_remove, backend=self.backend)) if num_indices == 0: return num_particles = self.get_number_of_particles() new_indices = Array(np.uint32, n=(num_particles - num_indices), backend=self.backend) num_removed_particles = array.empty(1, dtype=np.int32, backend=self.backend) remove_knl, stride_knl = self._get_remove_particles_bool_kernels() remove_knl(if_remove=if_remove, new_indices=new_indices, num_removed_particles=num_removed_particles, num_particles=num_particles) new_num_particles = num_particles - int(num_removed_particles.get()) strides = set(self._particle_array.stride.values()) s_indices = {1: new_indices} for stride in strides: if stride == 1: continue size = new_num_particles * stride s_index = Array(np.uint32, n=size, backend=self.backend) stride_knl(new_indices, s_index, size, stride) s_indices[stride] = s_index for prop in self.properties: stride = self._particle_array.stride.get(prop, 1) s_index = s_indices[stride] self._data[prop].align(s_index) setattr(self, prop, self._data[prop]) if align: self.align_particles()
def _add_prop_or_const(self, name, carray): """Add a new property or constant given the name and carray, note that this assumes that this property is already added to the particle array. """ np_array = self._get_array(carray) g_ary = Array(np_array.dtype, n=carray.length, backend=self.backend) g_ary.set(np_array) self._data[name] = g_ary setattr(self, name, g_ary)
def find_neighbor_cids(self, tree_src): neighbor_cid_count = Array(np.uint32, n=self.unique_cid_count + 1, backend='opencl') find_neighbor_cid_counts = self._leaf_neighbor_operation( tree_src, args="uint2 *pbounds, int *cnt", setup="int count=0", operation=""" if (pbounds[cid_src].s0 < pbounds[cid_src].s1) count++; """, output_expr="cnt[i] = count;" ) find_neighbor_cid_counts = profile_kernel( find_neighbor_cid_counts, 'find_neighbor_cid_count', backend='opencl' ) find_neighbor_cid_counts(tree_src.pbounds.dev, neighbor_cid_count.dev) neighbor_psum = _get_neighbor_count_prefix_sum_kernel(self.ctx) neighbor_psum(neighbor_cid_count.dev) total_neighbors = int(neighbor_cid_count.dev[-1].get()) neighbor_cids = Array(np.uint32, n=total_neighbors, backend='opencl') find_neighbor_cids = self._leaf_neighbor_operation( tree_src, args="uint2 *pbounds, int *cnt, int *neighbor_cids", setup="int offset=cnt[i];", operation=""" if (pbounds[cid_src].s0 < pbounds[cid_src].s1) neighbor_cids[offset++] = cid_src; """, output_expr="" ) find_neighbor_cids = profile_kernel( find_neighbor_cids, 'find_neighbor_cids', backend='opencl') find_neighbor_cids(tree_src.pbounds.dev, neighbor_cid_count.dev, neighbor_cids.dev) return neighbor_cid_count, neighbor_cids
def append_parray(self, parray, align=True, update_constants=False): """ Add particles from a particle array properties that are not there in self will be added """ if parray.gpu is None: parray.set_device_helper(DeviceHelper(parray)) if parray.gpu.get_number_of_particles() == 0: return num_extra_particles = parray.gpu.get_number_of_particles() old_num_particles = self.get_number_of_particles() new_num_particles = num_extra_particles + old_num_particles # extend current arrays by the required number of particles self.extend(num_extra_particles) my_stride = self._particle_array.stride for prop_name in parray.gpu.properties: stride = parray.stride.get(prop_name, 1) if stride > 1 and prop_name not in my_stride: my_stride[prop_name] = stride if prop_name in self.properties: arr = self._data[prop_name] source = parray.gpu.get_device_array(prop_name) arr.dev[old_num_particles * stride:] = source.dev else: # meaning this property is not there in self. dtype = parray.gpu.get_device_array(prop_name).dtype arr = Array(dtype, n=new_num_particles * stride, backend=self.backend) arr.fill(parray.default_values[prop_name]) self.update_prop(prop_name, arr) # now add the values to the end of the created array dest = self._data[prop_name] source = parray.gpu.get_device_array(prop_name) dest.dev[old_num_particles * stride:] = source.dev if update_constants: for const in parray.gpu.constants: if const not in self.constants: arr = parray.gpu.get_device_array(const) self.update_const(const, arr.copy()) if num_extra_particles > 0 and align: self.align_particles()
def _update_node_data(self, offsets_prev, pbounds_prev, offsets, pbounds, seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes_next, n): """Update node data and return number of children which are leaves.""" # Update particle-related data of children set_node_data = self.main_helper.get_kernel("set_node_data", k=self.k) set_node_data(offsets_prev.dev, pbounds_prev.dev, offsets.dev, pbounds.dev, seg_flag.dev, child_count_prefix_sum.dev, np.uint32(csum_nodes), np.uint32(n)) # Set children offsets leaf_count = Array(np.uint32, n=1, backend='opencl') set_offsets = _get_set_offset_kernel(self.ctx, self.k, self.leaf_size) set_offsets(pbounds.dev, offsets.dev, leaf_count.dev, np.uint32(csum_nodes_next)) return leaf_count.dev[0].get()
def remove_particles(self, indices): """ Remove particles whose indices are given in index_list. We repeatedly interchange the values of the last element and values from the index_list and reduce the size of the array by one. This is done for every property that is being maintained. Parameters ---------- indices : array an array of indices, this array can be a list, numpy array or a LongArray. Notes ----- Pseudo-code for the implementation:: if index_list.length > number of particles raise ValueError sorted_indices <- index_list sorted in ascending order. for every every array in property_array array.remove(sorted_indices) """ if len(indices) > self.get_number_of_particles(): msg = 'Number of particles to be removed is greater than' msg += 'number of particles in array' raise ValueError(msg) num_particles = self.get_number_of_particles() if_remove = Array(np.int32, n=num_particles, backend=self.backend) if_remove.fill(0) fill_if_remove_knl = self._get_remove_particles_kernel() fill_if_remove_knl(indices, if_remove, num_particles) self._remove_particles_bool(if_remove)
def remove_particles(self, indices, align=True): """ Remove particles whose indices are given in index_list. Parameters ---------- indices : array an array of indices, this array can be a list, numpy array or a LongArray. """ if len(indices) > self.get_number_of_particles(): msg = 'Number of particles to be removed is greater than' msg += 'number of particles in array' raise ValueError(msg) num_particles = self.get_number_of_particles() if_remove = Array(np.int32, n=num_particles, backend=self.backend) if_remove.fill(0) fill_if_remove_knl = self._get_remove_particles_kernel() fill_if_remove_knl(indices, if_remove, num_particles) self._remove_particles_bool(if_remove, align=align)
def allocate_leaf_prop(self, dtype): return Array(dtype, n=int(self.unique_cid_count), backend='opencl')
def _build_tree(self, fixed_depth=None): # We build the tree one layer at a time. We stop building new # layers after either all the # nodes are leaves or after reaching the target depth (fixed_depth). # At this point, the information for each layer is segmented / not # contiguous in memory, and so we run a merge_layers procedure to # move the data for all layers into a single array. # # The procedure for building each layer can be split up as follows # 1) Determine which child each particle is going to belong to in the # next layer # 2) Perform a kind of segmented scan over this. This gives us the # new order of the particles so that consecutive particles lie in # the same child # 3) Reorder the particles based on this order # 4) Create a new layer and set the node data for the new layer. We # get to know which particles belong to each node directly from the # results of step 2 # 5) Set the predicted offsets of the children of the nodes in the # new layer. If a node has fewer than leaf_size particles, it's a # leaf. A kind of prefix sum over this directly let's us know the # predicted offsets. # Rinse and repeat for building more layers. # # Note that after building the last layer, the predicted offsets for # the children might not be correctly since we're not going to build # more layers. The _merge_layers procedure sets the offsets in the # last layer to -1 to correct this. num_leaves_here = 0 n = self.n temp_vars = {} self.depth = 0 self.num_nodes = [1] # Cumulative sum of nodes in the previous layers csum_nodes_prev = 0 csum_nodes = 1 # Initialize temporary data (but persistent across layers) self._create_temp_vars(temp_vars) child_count_prefix_sum = Array(get_vector_dtype('uint', self.k), n=n, backend='opencl') seg_flag = Array(cl.cltypes.char, n=n, backend='opencl') seg_flag.fill(0) seg_flag.dev[0] = 1 offsets_temp = [Array(np.int32, n=1, backend='opencl')] offsets_temp[-1].fill(1) pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')] pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n)) # FIXME: Depths above 20 possible and feasible for binary / quad trees loop_lim = min(fixed_depth, 20) for depth in range(1, loop_lim): num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here) if num_nodes == 0: break else: self.depth += 1 self.num_nodes.append(num_nodes) # Allocate new layer offsets_temp.append( Array(np.int32, n=self.num_nodes[-1], backend='opencl')) pbounds_temp.append( Array(cl.cltypes.uint2, n=self.num_nodes[-1], backend='opencl')) # Generate particle index and reorder the particles self._reorder_particles(depth, child_count_prefix_sum, offsets_temp[-2], pbounds_temp[-2], seg_flag, csum_nodes_prev, temp_vars) num_leaves_here = self._update_node_data( offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1], pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes, csum_nodes + self.num_nodes[-1], n) csum_nodes_prev = csum_nodes csum_nodes += self.num_nodes[-1] self._merge_layers(offsets_temp, pbounds_temp) self._clean_temp_vars(temp_vars)
def _allocate_memory(self, pa_gpu): shape = getattr(pa_gpu, self.varnames[0]).dev.shape[0] for v in self.varnames: setattr( self, v, Array(ctype_to_dtype(self.c_type), n=shape, backend='opencl'))
def _create_ghosts_periodic(self): """Identify boundary particles and create images. We need to find all particles that are within a specified distance from the boundaries and place image copies on the other side of the boundary. Corner reflections need to be accounted for when using domains with multiple periodicity. The periodic domain is specified using the DomainManager object """ copy_props = self.copy_props pa_wrappers = self.pa_wrappers narrays = self.narrays # cell size used to check for periodic ghosts. For summation density # like operations, we need to create two layers of ghost images, this # is configurable via the n_layers argument to the constructor. cell_size = self.n_layers * self.cell_size # periodic domain values xmin, xmax = self.xmin, self.xmax ymin, ymax = self.ymin, self.ymax zmin, zmax = self.zmin, self.zmax xtranslate = self.xtranslate ytranslate = self.ytranslate ztranslate = self.ztranslate # periodicity flags periodic_in_x = self.periodic_in_x periodic_in_y = self.periodic_in_y periodic_in_z = self.periodic_in_z reduce_knl = self._get_ghosts_reduction_kernel() scan_knl = self._get_ghosts_scan_kernel() translate_knl = self._get_translate_kernel() if not self.ghosts: self.ghosts = [ paw.pa.empty_clone(props=copy_props[i]) for i, paw in enumerate(pa_wrappers) ] else: for ghost_pa in self.ghosts: ghost_pa.resize(0) for i in range(narrays): self.ghosts[i].ensure_properties(pa_wrappers[i].pa, props=copy_props[i]) for i, pa_wrapper in enumerate(self.pa_wrappers): ghost_pa = self.ghosts[i] x = pa_wrapper.pa.gpu.x y = pa_wrapper.pa.gpu.y z = pa_wrapper.pa.gpu.z num_extra_particles = reduce_knl(x, y, z, xmin, ymin, zmin, xmax, ymax, zmax, cell_size, periodic_in_x, periodic_in_y, periodic_in_z) num_extra_particles = int(num_extra_particles) indices = Array(np.int32, n=num_extra_particles) masks = Array(np.int32, n=num_extra_particles) scan_knl(periodic_in_x=periodic_in_x, periodic_in_y=periodic_in_y, periodic_in_z=periodic_in_z, x=x, y=y, z=z, xmin=xmin, ymin=ymin, zmin=zmin, xmax=xmax, ymax=ymax, zmax=zmax, cell_size=cell_size, masks=masks, indices=indices) pa_wrapper.pa.extract_particles(indices, ghost_pa, align=False, props=copy_props[i]) translate_knl(ghost_pa.gpu.x, ghost_pa.gpu.y, ghost_pa.gpu.z, ghost_pa.gpu.tag, xtranslate, ytranslate, ztranslate, masks) pa_wrapper.pa.append_parray(ghost_pa, align=False)
def allocate_node_prop(self, dtype): return Array(dtype, n=self.total_nodes, backend='opencl')