Example #1
0
    def __init__(self, pa, dim=2, leaf_size=32, radius_scale=2.0,
                 use_double=False, c_type='float'):
        super(PointTree, self).__init__(pa.get_number_of_particles(), 2 ** dim,
                                        leaf_size)

        assert (1 <= dim <= 3)
        self.max_depth = None
        self.dim = dim
        self.powdim = 2 ** self.dim
        self.xvars = ('x', 'y', 'z')[:dim]

        self.c_type = c_type
        self.c_type_src = 'double' if use_double else 'float'

        if use_double and c_type == 'float':
            # Extend the search radius a little to account for rounding errors
            radius_scale = radius_scale * (1 + 2e-7)

        # y and z coordinates need to be present for 1D and z for 2D
        # This is because the NNPS implementation below assumes them to be
        # just set to 0.
        self.pa = ParticleArrayWrapper(pa, self.c_type_src,
                                       self.c_type, ('x', 'y', 'z', 'h'))

        self.radius_scale = radius_scale
        self.use_double = use_double

        self.helper = get_helper('tree/point_tree.mako', self.c_type)
        self.xmin = None
        self.xmax = None
        self.hmin = None
        self.make_vec = make_vec_dict[c_type][self.dim]
        self.ctx = get_context()
    def update_minmax_cl(self, props, only_min=False, only_max=False):
        if self.backend != 'opencl':
            raise ValueError('''Optimized minmax update only supported
                             using opencl backend''')
        if only_min and only_max:
            raise ValueError("Only one of only_min and only_max can be True")

        dtype = 'double' if self.use_double else 'float'
        op = 'min' if not only_max else ''
        op += 'max' if not only_min else ''
        name = "%s_collector_%s" % (op, ''.join([dtype] + props))

        from compyle.opencl import get_context
        ctx = get_context()

        mmc_dtype, mmc_c_decl = make_collector_dtype(ctx.devices[0],
                                                     self._dtype, props, name,
                                                     only_min, only_max)
        knl = self._get_minmax_kernel(ctx, dtype, mmc_dtype, props, only_min,
                                      only_max, name, mmc_c_decl)

        args = [getattr(self, prop).dev for prop in props]

        result = knl(*args).get()

        for prop in props:
            proparr = self._data[prop]
            if not only_max:
                proparr.minimum = result["cur_min_%s" % prop]
            if not only_min:
                proparr.maximum = result["cur_max_%s" % prop]
Example #3
0
 def __init__(self, acceleration_eval):
     self.object = acceleration_eval
     self.backend = acceleration_eval.backend
     self.all_array_names = get_all_array_names(
         self.object.particle_arrays
     )
     self.known_types = get_known_types_for_arrays(
         self.all_array_names
     )
     add_address_space(self.known_types)
     predefined = dict(get_predefined_types(
         self.object.all_group.pre_comp
     ))
     self.known_types.update(predefined)
     self.known_types['NBRS'] = KnownType('GLOBAL_MEM unsigned int*')
     self.data = []
     self._ctx = get_context(self.backend)
     self._queue = get_queue(self.backend)
     self._array_map = None
     self._array_index = None
     self._equations = {}
     self._cpu_structs = {}
     self._gpu_structs = {}
     self.calls = []
     self.program = None
Example #4
0
def get_context(backend):
    if backend == 'cuda':
        from compyle.cuda import set_context
        set_context()
        from pycuda.autoinit import context
        return context
    elif backend == 'opencl':
        from compyle.opencl import get_context
        return get_context()
    else:
        raise RuntimeError('Unsupported GPU backend %s' % backend)
Example #5
0
def update_minmax_gpu(ary_list, only_min=False, only_max=False,
                      backend=None):
    if not backend:
        backend = ary_list[0].backend

    if only_min and only_max:
        raise ValueError("Only one of only_min and only_max can be True")

    props = ['ary_%s' % i for i in range(len(ary_list))]

    dtype = ary_list[0].dtype
    ctype = dtype_to_ctype(dtype)

    op = 'min' if not only_max else ''
    op += 'max' if not only_min else ''
    name = "%s_collector_%s" % (op, ''.join([ctype] + props))

    if backend == 'opencl':
        from compyle.opencl import get_context
        ctx = get_context()
        device = ctx.devices[0]
    elif backend == 'cuda':
        ctx = None
        device = None

    mmc_dtype, mmc_c_decl = make_collector_dtype(device,
                                                 dtype, props, name,
                                                 only_min, only_max,
                                                 backend)

    if np.issubdtype(dtype, np.floating):
        inf = np.finfo(dtype).max
    else:
        inf = np.iinfo(dtype).max

    knl = get_minmax_kernel(ctx, ctype, inf, mmc_dtype, props,
                            only_min, only_max, name, mmc_c_decl,
                            backend)

    args = [ary.dev for ary in ary_list]

    result = knl(*args).get()

    for ary, prop in zip(ary_list, props):
        if not only_max:
            ary.minimum = result["cur_min_%s" % prop]
        if not only_min:
            ary.maximum = result["cur_max_%s" % prop]
Example #6
0
def get_cl_sort_kernel(arg_types, ary_list):
    import pyopencl as cl
    from pyopencl.scan import GenericScanKernel
    import pyopencl.algorithm
    from compyle.opencl import get_context, get_queue
    arg_names = ["ary_%s" % i for i in range(len(ary_list))]

    sort_args = [
        "%s %s" % (knowntype_to_ctype(ktype), name)
        for ktype, name in zip(arg_types, arg_names)
    ]

    sort_args = [arg.replace('GLOBAL_MEM', '__global') for arg in sort_args]

    sort_knl = cl.algorithm.RadixSort(get_context(),
                                      sort_args,
                                      scan_kernel=GenericScanKernel,
                                      key_expr="ary_0[i]",
                                      sort_arg_names=arg_names)

    return sort_knl
Example #7
0
    def __init__(self, n, k=8, leaf_size=32):
        self.ctx = get_context()
        self.queue = get_queue()
        self.sorted = False
        self.main_helper = get_helper(os.path.join('tree', 'tree.mako'))

        self.initialized = False
        self.preamble = ""
        self.leaf_size = leaf_size
        self.k = k
        self.n = n
        self.sorted = False
        self.depth = 0

        self.index_function_args = []
        self.index_function_arg_ctypes = []
        self.index_function_arg_dtypes = []
        self.index_function_consts = []
        self.index_function_const_ctypes = []
        self.index_code = ""

        self.set_index_function_info()
Example #8
0
 def _gpu_copy(self, pa_gpu):
     copy_kernel = get_copy_kernel(get_context(), self.c_type_src,
                                   self.c_type, self.varnames)
     args = [getattr(pa_gpu, v).dev for v in self.varnames]
     args += [getattr(self, v).dev for v in self.varnames]
     copy_kernel(*args)
Example #9
0
def get_simple_kernel(kernel_name, args, src, wgs, preamble=""):
    ctx = get_context()
    knl = SimpleKernel(ctx, args, src, wgs, kernel_name, preamble=preamble)

    return profile_kernel(knl, kernel_name, backend='opencl')
Example #10
0
def get_elwise_kernel(kernel_name, args, src, preamble=""):
    ctx = get_context()
    from pyopencl.elementwise import ElementwiseKernel
    knl = ElementwiseKernel(ctx, args, src, kernel_name, preamble=preamble)
    return profile_kernel(knl, kernel_name, backend='opencl')