def set_node_bounds(self): vector_data_t = get_vector_dtype(self.c_type, self.dim) dtype = ctype_to_dtype(self.c_type) self.node_xmin = self.allocate_node_prop(vector_data_t) self.node_xmax = self.allocate_node_prop(vector_data_t) self.node_hmax = self.allocate_node_prop(dtype) params = _get_node_bound_kernel_parameters(self.dim, self.c_type, self.xvars) set_node_bounds = self.tree_bottom_up(params['args'], params['setup'], params['leaf_operation'], params['node_operation'], params['output_expr'], preamble=_get_macros_preamble( self.c_type, self.sorted, self.dim)) set_node_bounds = profile_kernel(set_node_bounds, 'set_node_bounds', backend='opencl') pa_gpu = self.pa.gpu dtype = ctype_to_dtype(self.c_type) args = [self, self.pids.dev] args += [getattr(pa_gpu, v).dev for v in self.xvars] args += [ pa_gpu.h.dev, dtype(self.radius_scale), self.node_xmin.dev, self.node_xmax.dev, self.node_hmax.dev ] set_node_bounds(*args)
def find_neighbor_lengths_elementwise(self, neighbor_cid_count, neighbor_cids, tree_src, neighbor_count): self.check_nnps_compatibility(tree_src) pa_gpu_dst = self.pa.gpu pa_gpu_src = tree_src.pa.gpu dtype = ctype_to_dtype(self.c_type) find_neighbor_counts = self.helper.get_kernel( 'find_neighbor_counts_elementwise', sorted=self.sorted ) find_neighbor_counts(self.unique_cids_map.dev, tree_src.pids.dev, self.pids.dev, self.cids.dev, tree_src.pbounds.dev, self.pbounds.dev, pa_gpu_src.x.dev, pa_gpu_src.y.dev, pa_gpu_src.z.dev, pa_gpu_src.h.dev, pa_gpu_dst.x.dev, pa_gpu_dst.y.dev, pa_gpu_dst.z.dev, pa_gpu_dst.h.dev, dtype(self.radius_scale), neighbor_cid_count.dev, neighbor_cids.dev, neighbor_count.dev)
def find_neighbor_lengths(self, neighbor_cid_count, neighbor_cids, tree_src, neighbor_count, use_partitions=False): self.check_nnps_compatibility(tree_src) wgs = self.leaf_size pa_gpu_dst = self.pa.gpu pa_gpu_src = tree_src.pa.gpu dtype = ctype_to_dtype(self.c_type) def find_neighbor_counts_for_partition(partition_cids, partition_size, partition_wgs, q=None): find_neighbor_counts = self.helper.get_kernel( 'find_neighbor_counts', sorted=self.sorted, wgs=wgs) find_neighbor_counts(partition_cids.dev, tree_src.pids.dev, self.pids.dev, self.cids.dev, tree_src.pbounds.dev, self.pbounds.dev, pa_gpu_src.x.dev, pa_gpu_src.y.dev, pa_gpu_src.z.dev, pa_gpu_src.h.dev, pa_gpu_dst.x.dev, pa_gpu_dst.y.dev, pa_gpu_dst.z.dev, pa_gpu_dst.h.dev, dtype(self.radius_scale), neighbor_cid_count.dev, neighbor_cids.dev, neighbor_count.dev, gs=(partition_wgs * partition_size, ), ls=(partition_wgs, ), queue=(get_queue() if q is None else q)) if use_partitions and wgs > 32: if wgs < 128: wgs1 = 32 else: wgs1 = 64 m1, n1 = self.get_leaf_size_partitions(0, wgs1) find_neighbor_counts_for_partition(m1, n1, min(wgs, wgs1)) m2, n2 = self.get_leaf_size_partitions(wgs1, wgs) find_neighbor_counts_for_partition(m2, n2, wgs) else: find_neighbor_counts_for_partition(self.unique_cids, self.unique_cid_count, wgs)
def _bin(self): dtype = ctype_to_dtype(self.c_type) fill_particle_data = self.helper.get_kernel("fill_particle_data", dim=self.dim, xvars=self.xvars) pa_gpu = self.pa.gpu args = [getattr(pa_gpu, v).dev for v in self.xvars] args += [dtype(self.cell_size), self.make_vec(*[self.xmin[i] for i in range(self.dim)]), self.sfc.dev, self.pids.dev] fill_particle_data(*args)
def find_neighbors(self, neighbor_cid_count, neighbor_cids, tree_src, start_indices, neighbors, use_partitions=False): self.check_nnps_compatibility(tree_src) wgs = self.leaf_size if self.leaf_size % 32 == 0 else \ self.leaf_size + 32 - self.leaf_size % 32 pa_gpu_dst = self.pa.gpu pa_gpu_src = tree_src.pa.gpu dtype = ctype_to_dtype(self.c_type) def find_neighbors_for_partition(partition_cids, partition_size, partition_wgs, q=None): find_neighbors = self.helper.get_kernel('find_neighbors', sorted=self.sorted, wgs=wgs) find_neighbors(partition_cids.dev, tree_src.pids.dev, self.pids.dev, self.cids.dev, tree_src.pbounds.dev, self.pbounds.dev, pa_gpu_src.x.dev, pa_gpu_src.y.dev, pa_gpu_src.z.dev, pa_gpu_src.h.dev, pa_gpu_dst.x.dev, pa_gpu_dst.y.dev, pa_gpu_dst.z.dev, pa_gpu_dst.h.dev, dtype(self.radius_scale), neighbor_cid_count.dev, neighbor_cids.dev, start_indices.dev, neighbors.dev, gs=(partition_wgs * partition_size,), ls=(partition_wgs,), queue=(get_queue() if q is None else q)) if use_partitions and wgs > 32: if wgs < 128: wgs1 = 32 else: wgs1 = 64 m1, n1 = self.get_leaf_size_partitions(0, wgs1) fraction = (n1 / int(self.unique_cid_count)) if fraction > 0.3: find_neighbors_for_partition(m1, n1, wgs1) m2, n2 = self.get_leaf_size_partitions(wgs1, wgs) assert (n1 + n2 == self.unique_cid_count) find_neighbors_for_partition(m2, n2, wgs) return else: find_neighbors_for_partition( self.unique_cids, self.unique_cid_count, wgs)