예제 #1
0
    def append_parray(self, parray, align=True, update_constants=False):
        """ Add particles from a particle array

        properties that are not there in self will be added
        """
        if parray.gpu is None:
            parray.set_device_helper(DeviceHelper(parray))

        if parray.gpu.get_number_of_particles() == 0:
            return

        num_extra_particles = parray.gpu.get_number_of_particles()
        old_num_particles = self.get_number_of_particles()
        new_num_particles = num_extra_particles + old_num_particles

        # extend current arrays by the required number of particles
        self.extend(num_extra_particles)

        my_stride = self._particle_array.stride
        for prop_name in parray.gpu.properties:
            stride = parray.stride.get(prop_name, 1)
            if stride > 1 and prop_name not in my_stride:
                my_stride[prop_name] = stride
            if prop_name in self.properties:
                arr = self._data[prop_name]
                source = parray.gpu.get_device_array(prop_name)
                arr.dev[old_num_particles * stride:] = source.dev
            else:
                # meaning this property is not there in self.
                dtype = parray.gpu.get_device_array(prop_name).dtype
                arr = Array(dtype, n=new_num_particles * stride,
                            backend=self.backend)
                arr.fill(parray.default_values[prop_name])
                self.update_prop(prop_name, arr)

                # now add the values to the end of the created array
                dest = self._data[prop_name]
                source = parray.gpu.get_device_array(prop_name)
                dest.dev[old_num_particles * stride:] = source.dev

        if update_constants:
            for const in parray.gpu.constants:
                if const not in self.constants:
                    arr = parray.gpu.get_device_array(const)
                    self.update_const(const, arr.copy())

        if num_extra_particles > 0 and align:
            self.align_particles()
예제 #2
0
    def remove_particles(self, indices):
        """ Remove particles whose indices are given in index_list.

        We repeatedly interchange the values of the last element and
        values from the index_list and reduce the size of the array
        by one. This is done for every property that is being maintained.

        Parameters
        ----------

        indices : array
            an array of indices, this array can be a list, numpy array
            or a LongArray.

        Notes
        -----

        Pseudo-code for the implementation::

            if index_list.length > number of particles
                raise ValueError

            sorted_indices <- index_list sorted in ascending order.

            for every every array in property_array
                array.remove(sorted_indices)

        """
        if len(indices) > self.get_number_of_particles():
            msg = 'Number of particles to be removed is greater than'
            msg += 'number of particles in array'
            raise ValueError(msg)

        num_particles = self.get_number_of_particles()
        if_remove = Array(np.int32, n=num_particles, backend=self.backend)
        if_remove.fill(0)

        fill_if_remove_knl = self._get_remove_particles_kernel()
        fill_if_remove_knl(indices, if_remove, num_particles)

        self._remove_particles_bool(if_remove)
예제 #3
0
    def remove_particles(self, indices, align=True):
        """ Remove particles whose indices are given in index_list.

        Parameters
        ----------

        indices : array
            an array of indices, this array can be a list, numpy array
            or a LongArray.

        """
        if len(indices) > self.get_number_of_particles():
            msg = 'Number of particles to be removed is greater than'
            msg += 'number of particles in array'
            raise ValueError(msg)

        num_particles = self.get_number_of_particles()
        if_remove = Array(np.int32, n=num_particles, backend=self.backend)
        if_remove.fill(0)

        fill_if_remove_knl = self._get_remove_particles_kernel()
        fill_if_remove_knl(indices, if_remove, num_particles)

        self._remove_particles_bool(if_remove, align=align)
예제 #4
0
    def _build_tree(self, fixed_depth=None):
        # We build the tree one layer at a time. We stop building new
        # layers after either all the
        # nodes are leaves or after reaching the target depth (fixed_depth).
        # At this point, the information for each layer is segmented / not
        # contiguous in memory, and so we run a merge_layers procedure to
        # move the data for all layers into a single array.
        #
        # The procedure for building each layer can be split up as follows
        # 1) Determine which child each particle is going to belong to in the
        #    next layer
        # 2) Perform a kind of segmented scan over this. This gives us the
        #    new order of the particles so that consecutive particles lie in
        #    the same child
        # 3) Reorder the particles based on this order
        # 4) Create a new layer and set the node data for the new layer. We
        #    get to know which particles belong to each node directly from the
        #    results of step 2
        # 5) Set the predicted offsets of the children of the nodes in the
        #    new layer. If a node has fewer than leaf_size particles, it's a
        #    leaf. A kind of prefix sum over this directly let's us know the
        #    predicted offsets.
        # Rinse and repeat for building more layers.
        #
        # Note that after building the last layer, the predicted offsets for
        # the children might not be correctly since we're not going to build
        # more layers. The _merge_layers procedure sets the offsets in the
        # last layer to -1 to correct this.

        num_leaves_here = 0
        n = self.n
        temp_vars = {}

        self.depth = 0
        self.num_nodes = [1]

        # Cumulative sum of nodes in the previous layers
        csum_nodes_prev = 0
        csum_nodes = 1

        # Initialize temporary data (but persistent across layers)
        self._create_temp_vars(temp_vars)

        child_count_prefix_sum = Array(get_vector_dtype('uint', self.k),
                                       n=n,
                                       backend='opencl')

        seg_flag = Array(cl.cltypes.char, n=n, backend='opencl')
        seg_flag.fill(0)
        seg_flag.dev[0] = 1

        offsets_temp = [Array(np.int32, n=1, backend='opencl')]
        offsets_temp[-1].fill(1)

        pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')]
        pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n))

        # FIXME: Depths above 20 possible and feasible for binary / quad trees
        loop_lim = min(fixed_depth, 20)

        for depth in range(1, loop_lim):
            num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here)
            if num_nodes == 0:
                break
            else:
                self.depth += 1
            self.num_nodes.append(num_nodes)

            # Allocate new layer
            offsets_temp.append(
                Array(np.int32, n=self.num_nodes[-1], backend='opencl'))
            pbounds_temp.append(
                Array(cl.cltypes.uint2, n=self.num_nodes[-1],
                      backend='opencl'))

            # Generate particle index and reorder the particles
            self._reorder_particles(depth, child_count_prefix_sum,
                                    offsets_temp[-2], pbounds_temp[-2],
                                    seg_flag, csum_nodes_prev, temp_vars)

            num_leaves_here = self._update_node_data(
                offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1],
                pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes,
                csum_nodes + self.num_nodes[-1], n)

            csum_nodes_prev = csum_nodes
            csum_nodes += self.num_nodes[-1]

        self._merge_layers(offsets_temp, pbounds_temp)
        self._clean_temp_vars(temp_vars)
예제 #5
0
class Tree(object):
    """k-ary Tree
    """
    def __init__(self, n, k=8, leaf_size=32):
        self.ctx = get_context()
        self.queue = get_queue()
        self.sorted = False
        self.main_helper = get_helper(os.path.join('tree', 'tree.mako'))

        self.initialized = False
        self.preamble = ""
        self.leaf_size = leaf_size
        self.k = k
        self.n = n
        self.sorted = False
        self.depth = 0

        self.index_function_args = []
        self.index_function_arg_ctypes = []
        self.index_function_arg_dtypes = []
        self.index_function_consts = []
        self.index_function_const_ctypes = []
        self.index_code = ""

        self.set_index_function_info()

    def set_index_function_info(self):
        raise NotImplementedError

    def get_data_args(self):
        return [getattr(self, v) for v in self.index_function_args]

    def get_index_constants(self, depth):
        raise NotImplementedError

    def _initialize_data(self):
        self.sorted = False
        num_particles = self.n
        self.pids = Array(np.uint32, n=num_particles, backend='opencl')
        self.cids = Array(np.uint32, n=num_particles, backend='opencl')
        self.cids.fill(0)

        for var, dtype in zip(self.index_function_args,
                              self.index_function_arg_dtypes):
            setattr(self, var, Array(dtype, n=num_particles, backend='opencl'))

        # Filled after tree built
        self.pbounds = None
        self.offsets = None
        self.initialized = True

    def _reinitialize_data(self):
        self.sorted = False
        num_particles = self.n
        self.pids.resize(num_particles)
        self.cids.resize(num_particles)
        self.cids.fill(0)

        for var in self.index_function_args:
            getattr(self, var).resize(num_particles)

        # Filled after tree built
        self.pbounds = None
        self.offsets = None

    def _setup_build(self):
        if not self.initialized:
            self._initialize_data()
        else:
            self._reinitialize_data()

    def _build(self, fixed_depth=None):
        self._build_tree(fixed_depth)

    ###########################################################################
    # Core construction algorithm and helper functions
    ###########################################################################

    # A little bit of manual book-keeping for temporary variables.
    # More specifically, these temporary variables would otherwise be thrown
    # away after building each layer of the tree.
    # We could instead just allocate new arrays after building each layer and
    # and let the GC take care of stuff but I'm guessing this is a
    # a better approach to save on memory
    def _create_temp_vars(self, temp_vars):
        n = self.n
        temp_vars['pids'] = Array(np.uint32, n=n, backend='opencl')
        for var, dtype in zip(self.index_function_args,
                              self.index_function_arg_dtypes):
            temp_vars[var] = Array(dtype, n=n, backend='opencl')
        temp_vars['cids'] = Array(np.uint32, n=n, backend='opencl')

    def _exchange_temp_vars(self, temp_vars):
        for k in temp_vars.keys():
            t = temp_vars[k]
            temp_vars[k] = getattr(self, k)
            setattr(self, k, t)

    def _clean_temp_vars(self, temp_vars):
        for k in list(temp_vars.keys()):
            del temp_vars[k]

    def _get_temp_data_args(self, temp_vars):
        result = [temp_vars[v] for v in self.index_function_args]
        return result

    def _reorder_particles(self, depth, child_count_prefix_sum, offsets_parent,
                           pbounds_parent, seg_flag, csum_nodes_prev,
                           temp_vars):
        # Scan

        args = [('__global ' + ctype + ' *' + v) for v, ctype in zip(
            self.index_function_args, self.index_function_arg_ctypes)]
        args += [(ctype + ' ' + v) for v, ctype in zip(
            self.index_function_consts, self.index_function_const_ctypes)]
        args = ', '.join(args)

        particle_kernel = _get_particle_kernel(self.ctx, self.k, args,
                                               self.index_code)
        args = [seg_flag.dev, child_count_prefix_sum.dev]
        args += [x.dev for x in self.get_data_args()]
        args += self.get_index_constants(depth)
        particle_kernel(*args)

        # Reorder particles
        reorder_particles = self.main_helper.get_kernel(
            'reorder_particles',
            k=self.k,
            data_vars=tuple(self.index_function_args),
            data_var_ctypes=tuple(self.index_function_arg_ctypes),
            const_vars=tuple(self.index_function_consts),
            const_var_ctypes=tuple(self.index_function_const_ctypes),
            index_code=self.index_code)

        args = [
            self.pids.dev, self.cids.dev, seg_flag.dev, pbounds_parent.dev,
            offsets_parent.dev, child_count_prefix_sum.dev,
            temp_vars['pids'].dev, temp_vars['cids'].dev
        ]
        args += [x.dev for x in self.get_data_args()]
        args += [x.dev for x in self._get_temp_data_args(temp_vars)]
        args += self.get_index_constants(depth)
        args += [np.uint32(csum_nodes_prev)]

        reorder_particles(*args)
        self._exchange_temp_vars(temp_vars)

    def _merge_layers(self, offsets_temp, pbounds_temp):
        curr_offset = 0
        total_nodes = 0

        for i in range(self.depth + 1):
            total_nodes += self.num_nodes[i]

        self.offsets = Array(np.int32, n=total_nodes, backend='opencl')
        self.pbounds = Array(cl.cltypes.uint2, n=total_nodes, backend='opencl')

        append_layer = self.main_helper.get_kernel('append_layer')

        self.total_nodes = total_nodes
        for i in range(self.depth + 1):
            append_layer(offsets_temp[i].dev, pbounds_temp[i].dev,
                         self.offsets.dev, self.pbounds.dev,
                         np.int32(curr_offset), np.uint8(i == self.depth))
            curr_offset += self.num_nodes[i]

    def _update_node_data(self, offsets_prev, pbounds_prev, offsets, pbounds,
                          seg_flag, child_count_prefix_sum, csum_nodes,
                          csum_nodes_next, n):
        """Update node data and return number of children which are leaves."""

        # Update particle-related data of children
        set_node_data = self.main_helper.get_kernel("set_node_data", k=self.k)
        set_node_data(offsets_prev.dev, pbounds_prev.dev, offsets.dev,
                      pbounds.dev, seg_flag.dev, child_count_prefix_sum.dev,
                      np.uint32(csum_nodes), np.uint32(n))

        # Set children offsets
        leaf_count = Array(np.uint32, n=1, backend='opencl')
        set_offsets = _get_set_offset_kernel(self.ctx, self.k, self.leaf_size)
        set_offsets(pbounds.dev, offsets.dev, leaf_count.dev,
                    np.uint32(csum_nodes_next))
        return leaf_count.dev[0].get()

    def _build_tree(self, fixed_depth=None):
        # We build the tree one layer at a time. We stop building new
        # layers after either all the
        # nodes are leaves or after reaching the target depth (fixed_depth).
        # At this point, the information for each layer is segmented / not
        # contiguous in memory, and so we run a merge_layers procedure to
        # move the data for all layers into a single array.
        #
        # The procedure for building each layer can be split up as follows
        # 1) Determine which child each particle is going to belong to in the
        #    next layer
        # 2) Perform a kind of segmented scan over this. This gives us the
        #    new order of the particles so that consecutive particles lie in
        #    the same child
        # 3) Reorder the particles based on this order
        # 4) Create a new layer and set the node data for the new layer. We
        #    get to know which particles belong to each node directly from the
        #    results of step 2
        # 5) Set the predicted offsets of the children of the nodes in the
        #    new layer. If a node has fewer than leaf_size particles, it's a
        #    leaf. A kind of prefix sum over this directly let's us know the
        #    predicted offsets.
        # Rinse and repeat for building more layers.
        #
        # Note that after building the last layer, the predicted offsets for
        # the children might not be correctly since we're not going to build
        # more layers. The _merge_layers procedure sets the offsets in the
        # last layer to -1 to correct this.

        num_leaves_here = 0
        n = self.n
        temp_vars = {}

        self.depth = 0
        self.num_nodes = [1]

        # Cumulative sum of nodes in the previous layers
        csum_nodes_prev = 0
        csum_nodes = 1

        # Initialize temporary data (but persistent across layers)
        self._create_temp_vars(temp_vars)

        child_count_prefix_sum = Array(get_vector_dtype('uint', self.k),
                                       n=n,
                                       backend='opencl')

        seg_flag = Array(cl.cltypes.char, n=n, backend='opencl')
        seg_flag.fill(0)
        seg_flag.dev[0] = 1

        offsets_temp = [Array(np.int32, n=1, backend='opencl')]
        offsets_temp[-1].fill(1)

        pbounds_temp = [Array(cl.cltypes.uint2, n=1, backend='opencl')]
        pbounds_temp[-1].dev[0].set(cl.cltypes.make_uint2(0, n))

        # FIXME: Depths above 20 possible and feasible for binary / quad trees
        loop_lim = min(fixed_depth, 20)

        for depth in range(1, loop_lim):
            num_nodes = self.k * (self.num_nodes[-1] - num_leaves_here)
            if num_nodes == 0:
                break
            else:
                self.depth += 1
            self.num_nodes.append(num_nodes)

            # Allocate new layer
            offsets_temp.append(
                Array(np.int32, n=self.num_nodes[-1], backend='opencl'))
            pbounds_temp.append(
                Array(cl.cltypes.uint2, n=self.num_nodes[-1],
                      backend='opencl'))

            # Generate particle index and reorder the particles
            self._reorder_particles(depth, child_count_prefix_sum,
                                    offsets_temp[-2], pbounds_temp[-2],
                                    seg_flag, csum_nodes_prev, temp_vars)

            num_leaves_here = self._update_node_data(
                offsets_temp[-2], pbounds_temp[-2], offsets_temp[-1],
                pbounds_temp[-1], seg_flag, child_count_prefix_sum, csum_nodes,
                csum_nodes + self.num_nodes[-1], n)

            csum_nodes_prev = csum_nodes
            csum_nodes += self.num_nodes[-1]

        self._merge_layers(offsets_temp, pbounds_temp)
        self._clean_temp_vars(temp_vars)

    ###########################################################################
    # Misc
    ###########################################################################

    def _get_unique_cids_and_count(self):
        n = self.n
        self.unique_cids = Array(np.uint32, n=n, backend='opencl')
        self.unique_cids_map = Array(np.uint32, n=n, backend='opencl')
        uniq_count = Array(np.uint32, n=1, backend='opencl')
        unique_cids_kernel = _get_unique_cids_kernel(self.ctx)
        unique_cids_kernel(self.cids.dev, self.unique_cids_map.dev,
                           self.unique_cids.dev, uniq_count.dev)
        self.unique_cid_count = uniq_count.dev[0].get()

    def get_leaves(self):
        leaves = Array(np.uint32,
                       n=self.offsets.dev.shape[0],
                       backend='opencl')
        num_leaves = Array(np.uint32, n=1, backend='opencl')
        leaves_kernel = _get_leaves_kernel(self.ctx, self.leaf_size)
        leaves_kernel(self.offsets.dev, self.pbounds.dev, leaves.dev,
                      num_leaves.dev)

        num_leaves = num_leaves.dev[0].get()
        return leaves.dev[:num_leaves], num_leaves

    def _sort(self):
        """Set tree as being sorted

        The particle array needs to be aligned by the caller!
        """
        if not self.sorted:
            self.sorted = 1

    ###########################################################################
    # Tree API
    ###########################################################################
    def allocate_node_prop(self, dtype):
        return Array(dtype, n=self.total_nodes, backend='opencl')

    def allocate_leaf_prop(self, dtype):
        return Array(dtype, n=int(self.unique_cid_count), backend='opencl')

    def get_preamble(self):
        if self.sorted:
            return "#define PID(idx) (idx)"
        else:
            return "#define PID(idx) (pids[idx])"

    def get_leaf_size_partitions(self, group_min, group_max):
        """Partition leaves based on leaf size

        Parameters
        ----------
        group_min
            Minimum leaf size
        group_max
            Maximum leaf size
        Returns
        -------
        groups : Array
            An array which contains the cell ids of leaves
            with leaf size > group_min and leaf size <= group_max
        group_count : int
            The number of leaves which satisfy the given condition
            on the leaf size
        """
        groups = Array(np.uint32,
                       n=int(self.unique_cid_count),
                       backend='opencl')
        group_count = Array(np.uint32, n=1, backend='opencl')

        get_cid_groups = _get_cid_groups_kernel(self.ctx)
        get_cid_groups(self.unique_cids.dev[:self.unique_cid_count],
                       self.pbounds.dev, groups.dev, group_count.dev,
                       np.int32(group_min), np.int32(group_max))
        result = groups, int(group_count.dev[0].get())
        return result

    def tree_bottom_up(self,
                       args,
                       setup,
                       leaf_operation,
                       node_operation,
                       output_expr,
                       preamble=""):
        return tree_bottom_up(self.ctx, args, setup, leaf_operation,
                              node_operation, output_expr, preamble)

    def leaf_tree_traverse(self,
                           args,
                           setup,
                           node_operation,
                           leaf_operation,
                           output_expr,
                           common_operation="",
                           preamble=""):
        """
        Traverse this (source) tree. One thread for each leaf of
        destination tree.
        """

        return leaf_tree_traverse(self.ctx, self.k, args, setup,
                                  node_operation, leaf_operation, output_expr,
                                  common_operation, preamble)

    def point_tree_traverse(self,
                            args,
                            setup,
                            node_operation,
                            leaf_operation,
                            output_expr,
                            common_operation="",
                            preamble=""):
        """
        Traverse this (source) tree. One thread for each particle of
        destination tree.
        """

        return point_tree_traverse(self.ctx, self.k, args, setup,
                                   node_operation, leaf_operation, output_expr,
                                   common_operation, preamble)