def __init__(self, pa, dim=2, leaf_size=32, radius_scale=2.0, use_double=False, c_type='float'): super(PointTree, self).__init__(pa.get_number_of_particles(), 2 ** dim, leaf_size) assert (1 <= dim <= 3) self.max_depth = None self.dim = dim self.powdim = 2 ** self.dim self.xvars = ('x', 'y', 'z')[:dim] self.c_type = c_type self.c_type_src = 'double' if use_double else 'float' if use_double and c_type == 'float': # Extend the search radius a little to account for rounding errors radius_scale = radius_scale * (1 + 2e-7) # y and z coordinates need to be present for 1D and z for 2D # This is because the NNPS implementation below assumes them to be # just set to 0. self.pa = ParticleArrayWrapper(pa, self.c_type_src, self.c_type, ('x', 'y', 'z', 'h')) self.radius_scale = radius_scale self.use_double = use_double self.helper = get_helper('tree/point_tree.mako', self.c_type) self.xmin = None self.xmax = None self.hmin = None self.make_vec = make_vec_dict[c_type][self.dim] self.ctx = get_context()
def update_minmax_cl(self, props, only_min=False, only_max=False): if self.backend != 'opencl': raise ValueError('''Optimized minmax update only supported using opencl backend''') if only_min and only_max: raise ValueError("Only one of only_min and only_max can be True") dtype = 'double' if self.use_double else 'float' op = 'min' if not only_max else '' op += 'max' if not only_min else '' name = "%s_collector_%s" % (op, ''.join([dtype] + props)) from compyle.opencl import get_context ctx = get_context() mmc_dtype, mmc_c_decl = make_collector_dtype(ctx.devices[0], self._dtype, props, name, only_min, only_max) knl = self._get_minmax_kernel(ctx, dtype, mmc_dtype, props, only_min, only_max, name, mmc_c_decl) args = [getattr(self, prop).dev for prop in props] result = knl(*args).get() for prop in props: proparr = self._data[prop] if not only_max: proparr.minimum = result["cur_min_%s" % prop] if not only_min: proparr.maximum = result["cur_max_%s" % prop]
def __init__(self, acceleration_eval): self.object = acceleration_eval self.backend = acceleration_eval.backend self.all_array_names = get_all_array_names( self.object.particle_arrays ) self.known_types = get_known_types_for_arrays( self.all_array_names ) add_address_space(self.known_types) predefined = dict(get_predefined_types( self.object.all_group.pre_comp )) self.known_types.update(predefined) self.known_types['NBRS'] = KnownType('GLOBAL_MEM unsigned int*') self.data = [] self._ctx = get_context(self.backend) self._queue = get_queue(self.backend) self._array_map = None self._array_index = None self._equations = {} self._cpu_structs = {} self._gpu_structs = {} self.calls = [] self.program = None
def get_context(backend): if backend == 'cuda': from compyle.cuda import set_context set_context() from pycuda.autoinit import context return context elif backend == 'opencl': from compyle.opencl import get_context return get_context() else: raise RuntimeError('Unsupported GPU backend %s' % backend)
def update_minmax_gpu(ary_list, only_min=False, only_max=False, backend=None): if not backend: backend = ary_list[0].backend if only_min and only_max: raise ValueError("Only one of only_min and only_max can be True") props = ['ary_%s' % i for i in range(len(ary_list))] dtype = ary_list[0].dtype ctype = dtype_to_ctype(dtype) op = 'min' if not only_max else '' op += 'max' if not only_min else '' name = "%s_collector_%s" % (op, ''.join([ctype] + props)) if backend == 'opencl': from compyle.opencl import get_context ctx = get_context() device = ctx.devices[0] elif backend == 'cuda': ctx = None device = None mmc_dtype, mmc_c_decl = make_collector_dtype(device, dtype, props, name, only_min, only_max, backend) if np.issubdtype(dtype, np.floating): inf = np.finfo(dtype).max else: inf = np.iinfo(dtype).max knl = get_minmax_kernel(ctx, ctype, inf, mmc_dtype, props, only_min, only_max, name, mmc_c_decl, backend) args = [ary.dev for ary in ary_list] result = knl(*args).get() for ary, prop in zip(ary_list, props): if not only_max: ary.minimum = result["cur_min_%s" % prop] if not only_min: ary.maximum = result["cur_max_%s" % prop]
def get_cl_sort_kernel(arg_types, ary_list): import pyopencl as cl from pyopencl.scan import GenericScanKernel import pyopencl.algorithm from compyle.opencl import get_context, get_queue arg_names = ["ary_%s" % i for i in range(len(ary_list))] sort_args = [ "%s %s" % (knowntype_to_ctype(ktype), name) for ktype, name in zip(arg_types, arg_names) ] sort_args = [arg.replace('GLOBAL_MEM', '__global') for arg in sort_args] sort_knl = cl.algorithm.RadixSort(get_context(), sort_args, scan_kernel=GenericScanKernel, key_expr="ary_0[i]", sort_arg_names=arg_names) return sort_knl
def __init__(self, n, k=8, leaf_size=32): self.ctx = get_context() self.queue = get_queue() self.sorted = False self.main_helper = get_helper(os.path.join('tree', 'tree.mako')) self.initialized = False self.preamble = "" self.leaf_size = leaf_size self.k = k self.n = n self.sorted = False self.depth = 0 self.index_function_args = [] self.index_function_arg_ctypes = [] self.index_function_arg_dtypes = [] self.index_function_consts = [] self.index_function_const_ctypes = [] self.index_code = "" self.set_index_function_info()
def _gpu_copy(self, pa_gpu): copy_kernel = get_copy_kernel(get_context(), self.c_type_src, self.c_type, self.varnames) args = [getattr(pa_gpu, v).dev for v in self.varnames] args += [getattr(self, v).dev for v in self.varnames] copy_kernel(*args)
def get_simple_kernel(kernel_name, args, src, wgs, preamble=""): ctx = get_context() knl = SimpleKernel(ctx, args, src, wgs, kernel_name, preamble=preamble) return profile_kernel(knl, kernel_name, backend='opencl')
def get_elwise_kernel(kernel_name, args, src, preamble=""): ctx = get_context() from pyopencl.elementwise import ElementwiseKernel knl = ElementwiseKernel(ctx, args, src, kernel_name, preamble=preamble) return profile_kernel(knl, kernel_name, backend='opencl')