예제 #1
0
 def _create_temp_vars(self, temp_vars):
     n = self.n
     temp_vars['pids'] = Array(np.uint32, n=n, backend='opencl')
     for var, dtype in zip(self.index_function_args,
                           self.index_function_arg_dtypes):
         temp_vars[var] = Array(dtype, n=n, backend='opencl')
     temp_vars['cids'] = Array(np.uint32, n=n, backend='opencl')
예제 #2
0
    def get_leaf_size_partitions(self, group_min, group_max):
        """Partition leaves based on leaf size

        Parameters
        ----------
        group_min
            Minimum leaf size
        group_max
            Maximum leaf size
        Returns
        -------
        groups : Array
            An array which contains the cell ids of leaves
            with leaf size > group_min and leaf size <= group_max
        group_count : int
            The number of leaves which satisfy the given condition
            on the leaf size
        """
        groups = Array(np.uint32,
                       n=int(self.unique_cid_count),
                       backend='opencl')
        group_count = Array(np.uint32, n=1, backend='opencl')

        get_cid_groups = _get_cid_groups_kernel(self.ctx)
        get_cid_groups(self.unique_cids.dev[:self.unique_cid_count],
                       self.pbounds.dev, groups.dev, group_count.dev,
                       np.int32(group_min), np.int32(group_max))
        result = groups, int(group_count.dev[0].get())
        return result
예제 #3
0
 def _get_unique_cids_and_count(self):
     n = self.n
     self.unique_cids = Array(np.uint32, n=n, backend='opencl')
     self.unique_cids_map = Array(np.uint32, n=n, backend='opencl')
     uniq_count = Array(np.uint32, n=1, backend='opencl')
     unique_cids_kernel = _get_unique_cids_kernel(self.ctx)
     unique_cids_kernel(self.cids.dev, self.unique_cids_map.dev,
                        self.unique_cids.dev, uniq_count.dev)
     self.unique_cid_count = uniq_count.dev[0].get()
예제 #4
0
 def _add_prop_or_const(self, name, carray):
     """Add a new property or constant given the name and carray, note
     that this assumes that this property is already added to the
     particle array.
     """
     np_array = self._get_array(carray)
     g_ary = Array(np_array.dtype, n=carray.length, backend=self.backend)
     g_ary.set(np_array)
     self._data[name] = g_ary
     setattr(self, name, g_ary)
예제 #5
0
    def get_leaves(self):
        leaves = Array(np.uint32,
                       n=self.offsets.dev.shape[0],
                       backend='opencl')
        num_leaves = Array(np.uint32, n=1, backend='opencl')
        leaves_kernel = _get_leaves_kernel(self.ctx, self.leaf_size)
        leaves_kernel(self.offsets.dev, self.pbounds.dev, leaves.dev,
                      num_leaves.dev)

        num_leaves = num_leaves.dev[0].get()
        return leaves.dev[:num_leaves], num_leaves
예제 #6
0
    def append_parray(self, parray, align=True, update_constants=False):
        """ Add particles from a particle array

        properties that are not there in self will be added
        """
        if parray.gpu is None:
            parray.set_device_helper(DeviceHelper(parray))

        if parray.gpu.get_number_of_particles() == 0:
            return

        num_extra_particles = parray.gpu.get_number_of_particles()
        old_num_particles = self.get_number_of_particles()
        new_num_particles = num_extra_particles + old_num_particles

        # extend current arrays by the required number of particles
        self.extend(num_extra_particles)

        my_stride = self._particle_array.stride
        for prop_name in parray.gpu.properties:
            stride = parray.stride.get(prop_name, 1)
            if stride > 1 and prop_name not in my_stride:
                my_stride[prop_name] = stride
            if prop_name in self.properties:
                arr = self._data[prop_name]
                source = parray.gpu.get_device_array(prop_name)
                arr.dev[old_num_particles * stride:] = source.dev
            else:
                # meaning this property is not there in self.
                dtype = parray.gpu.get_device_array(prop_name).dtype
                arr = Array(dtype,
                            n=new_num_particles * stride,
                            backend=self.backend)
                arr.fill(parray.default_values[prop_name])
                self.update_prop(prop_name, arr)

                # now add the values to the end of the created array
                dest = self._data[prop_name]
                source = parray.gpu.get_device_array(prop_name)
                dest.dev[old_num_particles * stride:] = source.dev

        if update_constants:
            for const in parray.gpu.constants:
                if const not in self.constants:
                    arr = parray.gpu.get_device_array(const)
                    self.update_const(const, arr.copy())

        if num_extra_particles > 0 and align:
            self.align_particles()
예제 #7
0
    def _initialize_data(self):
        self.sorted = False
        num_particles = self.n
        self.pids = Array(np.uint32, n=num_particles, backend='opencl')
        self.cids = Array(np.uint32, n=num_particles, backend='opencl')
        self.cids.fill(0)

        for var, dtype in zip(self.index_function_args,
                              self.index_function_arg_dtypes):
            setattr(self, var, Array(dtype, n=num_particles, backend='opencl'))

        # Filled after tree built
        self.pbounds = None
        self.offsets = None
        self.initialized = True
예제 #8
0
    def _merge_layers(self, offsets_temp, pbounds_temp):
        curr_offset = 0
        total_nodes = 0

        for i in range(self.depth + 1):
            total_nodes += self.num_nodes[i]

        self.offsets = Array(np.int32, n=total_nodes, backend='opencl')
        self.pbounds = Array(cl.cltypes.uint2, n=total_nodes, backend='opencl')

        append_layer = self.main_helper.get_kernel('append_layer')

        self.total_nodes = total_nodes
        for i in range(self.depth + 1):
            append_layer(offsets_temp[i].dev, pbounds_temp[i].dev,
                         self.offsets.dev, self.pbounds.dev,
                         np.int32(curr_offset), np.uint8(i == self.depth))
            curr_offset += self.num_nodes[i]
예제 #9
0
    def _remove_particles_bool(self, if_remove, align=True):
        """ Remove particle i if if_remove[i] is True
        """
        num_indices = int(array.sum(if_remove, backend=self.backend))

        if num_indices == 0:
            return

        num_particles = self.get_number_of_particles()
        new_indices = Array(np.uint32,
                            n=(num_particles - num_indices),
                            backend=self.backend)
        num_removed_particles = array.empty(1,
                                            dtype=np.int32,
                                            backend=self.backend)

        remove_knl, stride_knl = self._get_remove_particles_bool_kernels()

        remove_knl(if_remove=if_remove,
                   new_indices=new_indices,
                   num_removed_particles=num_removed_particles,
                   num_particles=num_particles)

        new_num_particles = num_particles - int(num_removed_particles.get())

        strides = set(self._particle_array.stride.values())
        s_indices = {1: new_indices}
        for stride in strides:
            if stride == 1:
                continue
            size = new_num_particles * stride
            s_index = Array(np.uint32, n=size, backend=self.backend)
            stride_knl(new_indices, s_index, size, stride)
            s_indices[stride] = s_index

        for prop in self.properties:
            stride = self._particle_array.stride.get(prop, 1)
            s_index = s_indices[stride]
            self._data[prop].align(s_index)
            setattr(self, prop, self._data[prop])

        if align:
            self.align_particles()
예제 #10
0
    def find_neighbor_cids(self, tree_src):
        neighbor_cid_count = Array(np.uint32, n=self.unique_cid_count + 1,
                                   backend='opencl')
        find_neighbor_cid_counts = self._leaf_neighbor_operation(
            tree_src,
            args="uint2 *pbounds, int *cnt",
            setup="int count=0",
            operation="""
                    if (pbounds[cid_src].s0 < pbounds[cid_src].s1)
                        count++;
                    """,
            output_expr="cnt[i] = count;"
        )
        find_neighbor_cid_counts = profile_kernel(
            find_neighbor_cid_counts, 'find_neighbor_cid_count',
            backend='opencl'
        )
        find_neighbor_cid_counts(tree_src.pbounds.dev,
                                 neighbor_cid_count.dev)

        neighbor_psum = _get_neighbor_count_prefix_sum_kernel(self.ctx)
        neighbor_psum(neighbor_cid_count.dev)

        total_neighbors = int(neighbor_cid_count.dev[-1].get())
        neighbor_cids = Array(np.uint32, n=total_neighbors,
                              backend='opencl')

        find_neighbor_cids = self._leaf_neighbor_operation(
            tree_src,
            args="uint2 *pbounds, int *cnt, int *neighbor_cids",
            setup="int offset=cnt[i];",
            operation="""
            if (pbounds[cid_src].s0 < pbounds[cid_src].s1)
                neighbor_cids[offset++] = cid_src;
            """,
            output_expr=""
        )
        find_neighbor_cids = profile_kernel(
            find_neighbor_cids, 'find_neighbor_cids', backend='opencl')
        find_neighbor_cids(tree_src.pbounds.dev,
                           neighbor_cid_count.dev, neighbor_cids.dev)
        return neighbor_cid_count, neighbor_cids
예제 #11
0
    def remove_particles(self, indices):
        """ Remove particles whose indices are given in index_list.

        We repeatedly interchange the values of the last element and
        values from the index_list and reduce the size of the array
        by one. This is done for every property that is being maintained.

        Parameters
        ----------

        indices : array
            an array of indices, this array can be a list, numpy array
            or a LongArray.

        Notes
        -----

        Pseudo-code for the implementation::

            if index_list.length > number of particles
                raise ValueError

            sorted_indices <- index_list sorted in ascending order.

            for every every array in property_array
                array.remove(sorted_indices)

        """
        if len(indices) > self.get_number_of_particles():
            msg = 'Number of particles to be removed is greater than'
            msg += 'number of particles in array'
            raise ValueError(msg)

        num_particles = self.get_number_of_particles()
        if_remove = Array(np.int32, n=num_particles, backend=self.backend)
        if_remove.fill(0)

        fill_if_remove_knl = self._get_remove_particles_kernel()
        fill_if_remove_knl(indices, if_remove, num_particles)

        self._remove_particles_bool(if_remove)
예제 #12
0
    def remove_particles(self, indices, align=True):
        """ Remove particles whose indices are given in index_list.

        Parameters
        ----------

        indices : array
            an array of indices, this array can be a list, numpy array
            or a LongArray.

        """
        if len(indices) > self.get_number_of_particles():
            msg = 'Number of particles to be removed is greater than'
            msg += 'number of particles in array'
            raise ValueError(msg)

        num_particles = self.get_number_of_particles()
        if_remove = Array(np.int32, n=num_particles, backend=self.backend)
        if_remove.fill(0)

        fill_if_remove_knl = self._get_remove_particles_kernel()
        fill_if_remove_knl(indices, if_remove, num_particles)

        self._remove_particles_bool(if_remove, align=align)
예제 #13
0
    def _update_node_data(self, offsets_prev, pbounds_prev, offsets, pbounds,
                          seg_flag, child_count_prefix_sum, csum_nodes,
                          csum_nodes_next, n):
        """Update node data and return number of children which are leaves."""

        # Update particle-related data of children
        set_node_data = self.main_helper.get_kernel("set_node_data", k=self.k)
        set_node_data(offsets_prev.dev, pbounds_prev.dev, offsets.dev,
                      pbounds.dev, seg_flag.dev, child_count_prefix_sum.dev,
                      np.uint32(csum_nodes), np.uint32(n))

        # Set children offsets
        leaf_count = Array(np.uint32, n=1, backend='opencl')
        set_offsets = _get_set_offset_kernel(self.ctx, self.k, self.leaf_size)
        set_offsets(pbounds.dev, offsets.dev, leaf_count.dev,
                    np.uint32(csum_nodes_next))
        return leaf_count.dev[0].get()
예제 #14
0
    def _create_ghosts_periodic(self):
        """Identify boundary particles and create images.

        We need to find all particles that are within a specified
        distance from the boundaries and place image copies on the
        other side of the boundary. Corner reflections need to be
        accounted for when using domains with multiple periodicity.

        The periodic domain is specified using the DomainManager object

        """
        copy_props = self.copy_props
        pa_wrappers = self.pa_wrappers
        narrays = self.narrays

        # cell size used to check for periodic ghosts. For summation density
        # like operations, we need to create two layers of ghost images, this
        # is configurable via the n_layers argument to the constructor.
        cell_size = self.n_layers * self.cell_size

        # periodic domain values
        xmin, xmax = self.xmin, self.xmax
        ymin, ymax = self.ymin, self.ymax
        zmin, zmax = self.zmin, self.zmax

        xtranslate = self.xtranslate
        ytranslate = self.ytranslate
        ztranslate = self.ztranslate

        # periodicity flags
        periodic_in_x = self.periodic_in_x
        periodic_in_y = self.periodic_in_y
        periodic_in_z = self.periodic_in_z

        reduce_knl = self._get_ghosts_reduction_kernel()
        scan_knl = self._get_ghosts_scan_kernel()
        translate_knl = self._get_translate_kernel()

        if not self.ghosts:
            self.ghosts = [
                paw.pa.empty_clone(props=copy_props[i])
                for i, paw in enumerate(pa_wrappers)
            ]
        else:
            for ghost_pa in self.ghosts:
                ghost_pa.resize(0)
            for i in range(narrays):
                self.ghosts[i].ensure_properties(pa_wrappers[i].pa,
                                                 props=copy_props[i])

        for i, pa_wrapper in enumerate(self.pa_wrappers):
            ghost_pa = self.ghosts[i]

            x = pa_wrapper.pa.gpu.x
            y = pa_wrapper.pa.gpu.y
            z = pa_wrapper.pa.gpu.z

            num_extra_particles = reduce_knl(x, y, z, xmin, ymin, zmin, xmax,
                                             ymax, zmax, cell_size,
                                             periodic_in_x, periodic_in_y,
                                             periodic_in_z)

            num_extra_particles = int(num_extra_particles)

            indices = Array(np.int32, n=num_extra_particles)
            masks = Array(np.int32, n=num_extra_particles)

            scan_knl(periodic_in_x=periodic_in_x,
                     periodic_in_y=periodic_in_y,
                     periodic_in_z=periodic_in_z,
                     x=x,
                     y=y,
                     z=z,
                     xmin=xmin,
                     ymin=ymin,
                     zmin=zmin,
                     xmax=xmax,
                     ymax=ymax,
                     zmax=zmax,
                     cell_size=cell_size,
                     masks=masks,
                     indices=indices)

            pa_wrapper.pa.extract_particles(indices,
                                            ghost_pa,
                                            align=False,
                                            props=copy_props[i])

            translate_knl(ghost_pa.gpu.x, ghost_pa.gpu.y, ghost_pa.gpu.z,
                          ghost_pa.gpu.tag, xtranslate, ytranslate, ztranslate,
                          masks)

            pa_wrapper.pa.append_parray(ghost_pa, align=False)
예제 #15
0
 def _allocate_memory(self, pa_gpu):
     shape = getattr(pa_gpu, self.varnames[0]).dev.shape[0]
     for v in self.varnames:
         setattr(
             self, v,
             Array(ctype_to_dtype(self.c_type), n=shape, backend='opencl'))
예제 #16
0
class Tree(object):
    """k-ary Tree
    """
    def __init__(self, n, k=8, leaf_size=32):
        self.ctx = get_context()
        self.queue = get_queue()
        self.sorted = False
        self.main_helper = get_helper(os.path.join('tree', 'tree.mako'))

        self.initialized = False
        self.preamble = ""
        self.leaf_size = leaf_size
        self.k = k
        self.n = n
        self.sorted = False
        self.depth = 0

        self.index_function_args = []
        self.index_function_arg_ctypes = []
        self.index_function_arg_dtypes = []
        self.index_function_consts = []
        self.index_function_const_ctypes = []
        self.index_code = ""

        self.set_index_function_info()

    def set_index_function_info(self):
        raise NotImplementedError

    def get_data_args(self):
        return [getattr(self, v) for v in self.index_function_args]

    def get_index_constants(self, depth):
        raise NotImplementedError

    def _initialize_data(self):
        self.sorted = False
        num_particles = self.n
        self.pids = Array(np.uint32, n=num_particles, backend='opencl')
        self.cids = Array(np.uint32, n=num_particles, backend='opencl')
        self.cids.fill(0)

        for var, dtype in zip(self.index_function_args,
                              self.index_function_arg_dtypes):
            setattr(self, var, Array(dtype, n=num_particles, backend='opencl'))

        # Filled after tree built
        self.pbounds = None
        self.offsets = None
        self.initialized = True

    def _reinitialize_data(self):
        self.sorted = False
        num_particles = self.n
        self.pids.resize(num_particles)
        self.cids.resize(num_particles)
        self.cids.fill(0)

        for var in self.index_function_args:
            getattr(self, var).resize(num_particles)

        # Filled after tree built
        self.pbounds = None
        self.offsets = None

    def _setup_build(self):
        if not self.initialized:
            self._initialize_data()
        else:
            self._reinitialize_data()

    def _build(self, fixed_depth=None):
        self._build_tree(fixed_depth)

    ###########################################################################
    # Core construction algorithm and helper functions
    ###########################################################################

    # A little bit of manual book-keeping for temporary variables.
    # More specifically, these temporary variables would otherwise be thrown
    # away after building each layer of the tree.
    # We could instead just allocate new arrays after building each layer and
    # and let the GC take care of stuff but I'm guessing this is a
    # a better approach to save on memory
    def _create_temp_vars(self, temp_vars):
        n = self.n
        temp_vars['pids'] = Array(np.uint32, n=n, backend='opencl')
        for var, dtype in zip(self.index_function_args,
                              self.index_function_arg_dtypes):
            temp_vars[var] = Array(dtype, n=n, backend='opencl')
        temp_vars['cids'] = Array(np.uint32, n=n, backend='opencl')

    def _exchange_temp_vars(self, temp_vars):
        for k in temp_vars.keys():
            t = temp_vars[k]
            temp_vars[k] = getattr(self, k)
            setattr(self, k, t)

    def _clean_temp_vars(self, temp_vars):
        for k in list(temp_vars.keys()):
            del temp_vars[k]

    def _get_temp_data_args(self, temp_vars):
        result = [temp_vars[v] for v in self.index_function_args]
        return result

    def _reorder_particles(self, depth, child_count_prefix_sum, offsets_parent,
                           pbounds_parent, seg_flag, csum_nodes_prev,
                           temp_vars):
        # Scan

        args = [('__global ' + ctype + ' *' + v) for v, ctype in zip(
            self.index_function_args, self.index_function_arg_ctypes)]
        args += [(ctype + ' ' + v) for v, ctype in zip(
            self.index_function_consts, self.index_function_const_ctypes)]
        args = ', '.join(args)

        particle_kernel = _get_particle_kernel(self.ctx, self.k, args,
                                               self.index_code)
        args = [seg_flag.dev, child_count_prefix_sum.dev]
        args += [x.dev for x in self.get_data_args()]
        args += self.get_index_constants(depth)
        particle_kernel(*args)

        # Reorder particles
        reorder_particles = self.main_helper.get_kernel(
            'reorder_particles',
            k=self.k,
            data_vars=tuple(self.index_function_args),
            data_var_ctypes=tuple(self.index_function_arg_ctypes),
            const_vars=tuple(self.index_function_consts),
            const_var_ctypes=tuple(self.index_function_const_ctypes),
            index_code=self.index_code)

        args = [
            self.pids.dev, self.cids.dev, seg_flag.dev, pbounds_parent.dev,
            offsets_parent.dev, child_count_prefix_sum.dev,
            temp_vars['pids'].dev, temp_vars['cids'].dev
        ]
        args += [x.dev for x in self.get_data_args()]
        args += [x.dev for x in self._get_temp_data_args(temp_vars)]
        args += self.get_index_constants(depth)
        args += [np.uint32(csum_nodes_prev)]

        reorder_particles(*args)
        self._exchange_temp_vars(temp_vars)

    def _merge_layers(self, offsets_temp, pbounds_temp):
        curr_offset = 0
        total_nodes = 0

        for i in range(self.depth + 1):
            total_nodes += self.num_nodes[i]

        self.offsets = Array(np.int32, n=total_nodes, backend='opencl')
        self.pbounds = Array(cl.cltypes.uint2, n=total_nodes, backend='opencl')

        append_layer = self.main_helper.get_kernel('append_layer')

        self.total_nodes = total_nodes
        for i in range(self.depth + 1):
            append_layer(offsets_temp[i].dev, pbounds_temp[i].dev,
                         self.offsets.dev, self.pbounds.dev,
                         np.int32(curr_offset), np.uint8(i == self.depth))
            curr_offset += self.num_nodes[i]

    def _update_node_data(self, offsets_prev, pbounds_prev, offsets, pbounds,
                          seg_flag, child_count_prefix_sum, csum_nodes,
                          csum_nodes_next, n):
        """Update node data and return number of children which are leaves."""

        # Update particle-related data of children
        set_node_data = self.main_helper.get_kernel("set_node_data", k=self.k)
        set_node_data(offsets_prev.dev, pbounds_prev.dev, offsets.dev,
                      pbounds.dev, seg_flag.dev, child_count_prefix_sum.dev,
                      np.uint32(csum_nodes), np.uint32(n))

        # Set children offsets
        leaf_count = Array(np.uint32, n=1, backend='opencl')
        set_offsets = _get_set_offset_kernel(self.ctx, self.k, self.leaf_size)
        set_offsets(pbounds.dev, offsets.dev, leaf_count.dev,
                    np.uint32(csum_nodes_next))
        return leaf_count.dev[0].get()

    def _build_tree(self, fixed_depth=None):
        # We build the tree one layer at a time. We stop building new
        # layers after either all the
        # nodes are leaves or after reaching the target depth (fixed_depth).
        # At this point, the information for each layer is segmented / not
        # contiguous in memory, and so we run a merge_layers procedure to
        # move the data for all layers into a single array.
        #
        # The procedure for building each layer can be split up as follows
        # 1) Determine which child each particle is going to belong to in the
        #    next layer
        # 2) Perform a kind of segmented scan over this. This gives us the
        #    new order of the particles so that consecutive particles lie in
        #    the same child
        # 3) Reorder the particles based on this order
        # 4) Create a new layer and set the node data for the new layer. We
        #    get to know which particles belong to each node directly from the
        #    results of step 2
        # 5) Set the predicted offsets of the children of the nodes in the
        #    new layer. If a node has fewer than leaf_size particles, it's a
        #    leaf. A kind of prefix sum over this directly let's us know the
        #    predicted offsets.
        # Rinse and repeat for building more layers.
        #
        # Note that after building the last layer, the predicted offsets for
        # the children might not be correctly since we're not going to build
        # more layers. The _merge_layers procedure sets the offsets in the
        # last layer to -1 to correct this.

        num_leaves_here = 0
        n = self.n
        temp_vars = {}

        self.depth = 0
        self.num_nodes = [1]

        # Cumulative sum of nodes in the previous layers
        csum_nodes_prev = 0
        csum_nodes = 1

        # Initialize temporary data (but persistent across layers)
        self._create_temp_vars(temp_vars)

        child_count_prefix_sum = Array(get_vector_dtype('uint', self.k),
                                       n=n,
                                       backend='opencl')

        seg_flag = Array(cl.cltypes.char, n=n, backend='opencl')
        seg_flag.fill(0)
        seg_flag.dev[0] = 1

        offsets_temp = [Array(np.int32, n=1, backend='opencl')]
        offsets_temp[-1].fill(1)

        pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')]
        pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n))

        # FIXME: Depths above 20 possible and feasible for binary / quad trees
        loop_lim = min(fixed_depth, 20)

        for depth in range(1, loop_lim):
            num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here)
            if num_nodes == 0:
                break
            else:
                self.depth += 1
            self.num_nodes.append(num_nodes)

            # Allocate new layer
            offsets_temp.append(
                Array(np.int32, n=self.num_nodes[-1], backend='opencl'))
            pbounds_temp.append(
                Array(cl.cltypes.uint2, n=self.num_nodes[-1],
                      backend='opencl'))

            # Generate particle index and reorder the particles
            self._reorder_particles(depth, child_count_prefix_sum,
                                    offsets_temp[-2], pbounds_temp[-2],
                                    seg_flag, csum_nodes_prev, temp_vars)

            num_leaves_here = self._update_node_data(
                offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1],
                pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes,
                csum_nodes + self.num_nodes[-1], n)

            csum_nodes_prev = csum_nodes
            csum_nodes += self.num_nodes[-1]

        self._merge_layers(offsets_temp, pbounds_temp)
        self._clean_temp_vars(temp_vars)

    ###########################################################################
    # Misc
    ###########################################################################

    def _get_unique_cids_and_count(self):
        n = self.n
        self.unique_cids = Array(np.uint32, n=n, backend='opencl')
        self.unique_cids_map = Array(np.uint32, n=n, backend='opencl')
        uniq_count = Array(np.uint32, n=1, backend='opencl')
        unique_cids_kernel = _get_unique_cids_kernel(self.ctx)
        unique_cids_kernel(self.cids.dev, self.unique_cids_map.dev,
                           self.unique_cids.dev, uniq_count.dev)
        self.unique_cid_count = uniq_count.dev[0].get()

    def get_leaves(self):
        leaves = Array(np.uint32,
                       n=self.offsets.dev.shape[0],
                       backend='opencl')
        num_leaves = Array(np.uint32, n=1, backend='opencl')
        leaves_kernel = _get_leaves_kernel(self.ctx, self.leaf_size)
        leaves_kernel(self.offsets.dev, self.pbounds.dev, leaves.dev,
                      num_leaves.dev)

        num_leaves = num_leaves.dev[0].get()
        return leaves.dev[:num_leaves], num_leaves

    def _sort(self):
        """Set tree as being sorted

        The particle array needs to be aligned by the caller!
        """
        if not self.sorted:
            self.sorted = 1

    ###########################################################################
    # Tree API
    ###########################################################################
    def allocate_node_prop(self, dtype):
        return Array(dtype, n=self.total_nodes, backend='opencl')

    def allocate_leaf_prop(self, dtype):
        return Array(dtype, n=int(self.unique_cid_count), backend='opencl')

    def get_preamble(self):
        if self.sorted:
            return "#define PID(idx) (idx)"
        else:
            return "#define PID(idx) (pids[idx])"

    def get_leaf_size_partitions(self, group_min, group_max):
        """Partition leaves based on leaf size

        Parameters
        ----------
        group_min
            Minimum leaf size
        group_max
            Maximum leaf size
        Returns
        -------
        groups : Array
            An array which contains the cell ids of leaves
            with leaf size > group_min and leaf size <= group_max
        group_count : int
            The number of leaves which satisfy the given condition
            on the leaf size
        """
        groups = Array(np.uint32,
                       n=int(self.unique_cid_count),
                       backend='opencl')
        group_count = Array(np.uint32, n=1, backend='opencl')

        get_cid_groups = _get_cid_groups_kernel(self.ctx)
        get_cid_groups(self.unique_cids.dev[:self.unique_cid_count],
                       self.pbounds.dev, groups.dev, group_count.dev,
                       np.int32(group_min), np.int32(group_max))
        result = groups, int(group_count.dev[0].get())
        return result

    def tree_bottom_up(self,
                       args,
                       setup,
                       leaf_operation,
                       node_operation,
                       output_expr,
                       preamble=""):
        return tree_bottom_up(self.ctx, args, setup, leaf_operation,
                              node_operation, output_expr, preamble)

    def leaf_tree_traverse(self,
                           args,
                           setup,
                           node_operation,
                           leaf_operation,
                           output_expr,
                           common_operation="",
                           preamble=""):
        """
        Traverse this (source) tree. One thread for each leaf of
        destination tree.
        """

        return leaf_tree_traverse(self.ctx, self.k, args, setup,
                                  node_operation, leaf_operation, output_expr,
                                  common_operation, preamble)

    def point_tree_traverse(self,
                            args,
                            setup,
                            node_operation,
                            leaf_operation,
                            output_expr,
                            common_operation="",
                            preamble=""):
        """
        Traverse this (source) tree. One thread for each particle of
        destination tree.
        """

        return point_tree_traverse(self.ctx, self.k, args, setup,
                                   node_operation, leaf_operation, output_expr,
                                   common_operation, preamble)
예제 #17
0
    def _build_tree(self, fixed_depth=None):
        # We build the tree one layer at a time. We stop building new
        # layers after either all the
        # nodes are leaves or after reaching the target depth (fixed_depth).
        # At this point, the information for each layer is segmented / not
        # contiguous in memory, and so we run a merge_layers procedure to
        # move the data for all layers into a single array.
        #
        # The procedure for building each layer can be split up as follows
        # 1) Determine which child each particle is going to belong to in the
        #    next layer
        # 2) Perform a kind of segmented scan over this. This gives us the
        #    new order of the particles so that consecutive particles lie in
        #    the same child
        # 3) Reorder the particles based on this order
        # 4) Create a new layer and set the node data for the new layer. We
        #    get to know which particles belong to each node directly from the
        #    results of step 2
        # 5) Set the predicted offsets of the children of the nodes in the
        #    new layer. If a node has fewer than leaf_size particles, it's a
        #    leaf. A kind of prefix sum over this directly let's us know the
        #    predicted offsets.
        # Rinse and repeat for building more layers.
        #
        # Note that after building the last layer, the predicted offsets for
        # the children might not be correctly since we're not going to build
        # more layers. The _merge_layers procedure sets the offsets in the
        # last layer to -1 to correct this.

        num_leaves_here = 0
        n = self.n
        temp_vars = {}

        self.depth = 0
        self.num_nodes = [1]

        # Cumulative sum of nodes in the previous layers
        csum_nodes_prev = 0
        csum_nodes = 1

        # Initialize temporary data (but persistent across layers)
        self._create_temp_vars(temp_vars)

        child_count_prefix_sum = Array(get_vector_dtype('uint', self.k),
                                       n=n,
                                       backend='opencl')

        seg_flag = Array(cl.cltypes.char, n=n, backend='opencl')
        seg_flag.fill(0)
        seg_flag.dev[0] = 1

        offsets_temp = [Array(np.int32, n=1, backend='opencl')]
        offsets_temp[-1].fill(1)

        pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')]
        pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n))

        # FIXME: Depths above 20 possible and feasible for binary / quad trees
        loop_lim = min(fixed_depth, 20)

        for depth in range(1, loop_lim):
            num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here)
            if num_nodes == 0:
                break
            else:
                self.depth += 1
            self.num_nodes.append(num_nodes)

            # Allocate new layer
            offsets_temp.append(
                Array(np.int32, n=self.num_nodes[-1], backend='opencl'))
            pbounds_temp.append(
                Array(cl.cltypes.uint2, n=self.num_nodes[-1],
                      backend='opencl'))

            # Generate particle index and reorder the particles
            self._reorder_particles(depth, child_count_prefix_sum,
                                    offsets_temp[-2], pbounds_temp[-2],
                                    seg_flag, csum_nodes_prev, temp_vars)

            num_leaves_here = self._update_node_data(
                offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1],
                pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes,
                csum_nodes + self.num_nodes[-1], n)

            csum_nodes_prev = csum_nodes
            csum_nodes += self.num_nodes[-1]

        self._merge_layers(offsets_temp, pbounds_temp)
        self._clean_temp_vars(temp_vars)
예제 #18
0
 def allocate_node_prop(self, dtype):
     return Array(dtype, n=self.total_nodes, backend='opencl')
예제 #19
0
 def allocate_leaf_prop(self, dtype):
     return Array(dtype, n=int(self.unique_cid_count), backend='opencl')