def gradient_field(data): dat = np.array(data) dat.shape = (dat.shape[0], dat.shape[1], dat.shape[2], 1) volume = CLReal() volume.value = dat gradient = CLReal4(*volume.shape) kern = CLTemplateKernel(src=_GRADIENT_SRC) kern.volume = volume kern.gradient = gradient kern.compile() kern() return gradient.value
class KD_tree(object): def __init__(self, pts, max_depth=None): if not len(pts.shape) == 2 and not pts.shape[1] == 4: raise ValueError('pts must be a numpy ndarray with shape (length,4).') if max_depth == None: max_depth = math.log(len(pts), 2) + 2 self._points = CLReal() parr = np.array(pts).flatten() parr.shape = (parr.shape[0], 1) self._points.value = parr data = list(kd_c.get_tree(pts)) for i in range(len(data)): data[i] = data[i].flatten() data[i].shape = (data[i].shape[0], 1) self._id = CLInt() self._parent = CLInt() self._cdim = CLInt() self._pt = CLInt() self._lo = CLInt() self._hi = CLInt() self._vmax = CLReal() self._vmin = CLReal() self._cval = CLReal() self._offset = CLScalar(0) self._id.value = data[0] self._parent.value = data[1] self._cdim.value = data[2] self._pt.value = data[3] self._lo.value = data[4] self._hi.value = data[5] self._vmin.value = data[6] self._vmax.value = data[7] self._cval.value = data[8] #prepare for nns self._query_points = CLReal4() self._neigh_i = CLInt() self._neigh_d = CLReal() #prepare the kernel pre = "#define MAX_DEPTH " + str(int(max_depth)) self._get_nn_krn = CLTemplateKernel(pre=pre, src=_GET_NN_SRC) self._get_nn_krn.query_points = self._query_points self._get_nn_krn.points = self._points self._get_nn_krn.id = self._id self._get_nn_krn.parent = self._parent self._get_nn_krn.cdim = self._cdim self._get_nn_krn.pt = self._pt self._get_nn_krn.lo = self._lo self._get_nn_krn.hi = self._hi self._get_nn_krn.vmin = self._vmin self._get_nn_krn.vmax = self._vmax self._get_nn_krn.cval = self._cval self._get_nn_krn.neigh_i = self._neigh_i self._get_nn_krn.neigh_d = self._neigh_d self._get_nn_krn.offset = self._offset self._get_nn_krn.compile() def get_nn(self, pts, cl_ref=False, chunksize=100000): len_pts = len(pts) chunks = int(math.ceil(float(len_pts)/float(chunksize))) neighs = np.zeros((len_pts,1), np.int32) dists = np.zeros((len_pts,1), np.float64) self._neigh_i.value = neighs self._neigh_d.value = dists self._query_points.value = pts for i in range(chunks): print 'computing chunk', i self._offset.value = i*chunksize execsize = chunksize if i == chunks - 1: execsize = len_pts - i*chunksize #print 'execsize is', execsize self._get_nn_krn.exec_with_size((execsize,)) self._get_nn_krn.finish() if cl_ref: return self._neigh_i, self._neigh_d else: return self._neigh_i.value, self._neigh_d.value def get_data(self): data = [] data.append(self._id.value) data.append(self._parent.value) data.append(self._cdim.value) data.append(self._pt.value) data.append(self._lo.value) data.append(self._hi.value) data.append(self._vmin.value) data.append(self._vmax.value) data.append(self._cval.value) return data
class Syncgroup(object): _fgrid_kern_src =""" int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1); uint z_index = $index[gid]; $celloffset[gid] = atomic_inc(&$grid[z_index]); """ _csort_str = """ int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1); int gsz = get_global_size(0)*get_global_size(1)*get_global_size(2); uint theindex = $index[gid]; int dest = $grid[theindex] - $celloffset[gid] - 1; if ((dest >= 0) && (dest < gsz)){ $copyblock } """ _reset_grid_str = """ int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1); $grid$[gid] = 0; """ _reset_co_str = """ int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1); $celloffset$[gid] = 0; """ def __init__(self, *args, **kwargs): """Create a new Syncgroup. All Members of the group will automatically synced in length. If 'axis' kwarg is provided, variables will be synced along the specified axis. Input: args -- an arbitrary number of CLVar objects kwargs --> provide 'axis' keyword argument to specify the axis to keep in sync. Must be 0, 1 or 2.""" self._vars = list(args) self._vars_copy = [] #needed for sorting self._sort_enabeld = False #needed for sorting for var in self._vars: if not var._addspc == 'global': raise ValueError('Only global variables can be synced.') self._axis = 0 ax = kwargs.pop('axis', 0) if ax < 3 and ax >= 0: self._axis = 0 else: raise ValueError('Axis can only be 0, 1 or 2.') maxlength = 0 mlvar = None for var in self._vars: var._register_grp(self) if var._value.shape[self._axis] > maxlength: maxlength = var._value.shape[self._axis] mlvar = var self.sync(mlvar) def sync(self, caller): """Sync group to length of caller (along the specified axis).""" for var in self._vars: if not(var == caller): shp = list(var._value.shape) shp[self._axis] = caller._value.shape[self._axis] var._read_buffer() var._value.resize(shp) var.set_value(var._value, dontsync=True) for var in self._vars_copy: if not(var == caller): shp = list(var._value.shape) shp[self._axis] = caller._value.shape[self._axis] var._read_buffer() var._value.resize(shp) var.set_value(var._value, dontsync=True) def add(self, *args, **kwargs): """Add variable to the group.""" for var in args: if not(var in self._vars): var._register_grp(self) self._vars.append(var) maxlength = 0 for var in self._vars: if var._value.shape[self._axis] > maxlength: maxlength = var._value.shape[self._axis] mlvar = var self.sync(mlvar) if self._sort_enabeld: self._setup_sortcopy() def setup_sort(self, index): """Setup the sorting algorithm. Index must be a CLVar of CLUInt type with dim=1, e.g. shape=(length,). The whole Group will be sorted according to the index values. Input: index -- the index to sort the group.""" if not index in self._vars: self.add(index) #setup vars self._sortindex = index self._grid = CLUInt(int(max_reduce(self._sortindex)) + 1) self._celloffset = CLUInt() self.add(self._celloffset) #fillgrid kernel src = Syncgroup._fgrid_kern_src src = src.replace('$index', self._sortindex.name) src = src.replace('$celloffset', self._celloffset.name) src = src.replace('$grid', self._grid.name) varlist = [self._sortindex, self._grid, self._celloffset] self._fillgridkern = CLKernel(src=src, varlist=varlist, name='fgridkern') self._setup_sortcopy() self._reset_grid_kern = CLTemplateKernel(src=Syncgroup._reset_grid_str, name='reset_grid') self._reset_grid_kern.grid = self._grid self._reset_grid_kern.compile() self._reset_co_kern = CLTemplateKernel(src=Syncgroup._reset_co_str, name='reset_co') self._reset_co_kern.celloffset = self._celloffset self._reset_co_kern.compile() self._sort_enabeld = True def _setup_sortcopy(self): self._vars_copy = [] for var in self._vars: copyvar = CLVar.var_like(var) self._vars_copy.append(copyvar) varlist = self._vars + self._vars_copy + [self._grid] #csort kenrel copyblock = '' backcopyblock = '' i = 0 for var in self._vars: copyblock += ' ' + self._vars_copy[i].name + '[dest] = ' + var.name + '[gid];\n' backcopyblock += ' ' + var.name + '[gid] = ' + self._vars_copy[i].name + '[gid];\n' i += 1 src = Syncgroup._csort_str src = src.replace('$index', self._sortindex.name) src = src.replace('$celloffset', self._celloffset.name) src = src.replace('$grid', self._grid.name) src = src.replace('$copyblock', copyblock) self._sortcopykern = CLKernel(src=src, varlist=varlist, name='sortcopykern') backcopystr = """int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);\n\n""" backcopystr += backcopyblock self._backcopykern = CLKernel(src=backcopystr, varlist=varlist, name='backcopykern') #@timing def sort(self): """Sort the group. Sorting must be enabled before. See setup_sort(). Error messages may be unhelpful otherwise, as correct sorting setup is not checked, when sort() is called (performance reasons).""" #reset grid (size may change depending on index), reset grid and celloffset to 0 self._grid.set_shape_wo_read( (int(max_reduce(self._sortindex)) + 1,) ) self._reset_grid_kern() self._reset_co_kern() #fill grid self._fillgridkern() #scan grid scan_uint(self._grid) # scan on GPU #sortcopy self._sortcopykern() #copy back self._backcopykern()
class Neighsearch(object): _find_kern_pre = """ int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1); int gsz = get_global_size(0)*get_global_size(1)*get_global_size(2); int xind = $ind4[gid].s0; int yind = $ind4[gid].s1; int zind = $ind4[gid].s2; real4 this_pos = $posarg[gid]; int id, xc, yc, zc, start; start = 0; int counter = 0; """ _find_kern_body = """ id = interleave3_16(xind + xc, yind + yc, zind + zc); if ((id < $grid_length) && (id >= 0)){ start = $grid[id] - 1; int len_cell = 0; if (id == 0){ len_cell = start + 1; }else{ len_cell = (start + 1) - $grid[id - 1]; } if ((start >= 0) && (start < $posarg_length)){ for (int i = 0; i < len_cell; i++){ if (counter < $maxneighs){ int other_id = start - i; if (other_id != gid){ real dist = length(this_pos - $posarg[other_id]); if (dist < $h){ $neighbors[gid*$maxneighs + counter] = other_id; counter++; } } } } } } """ _find_kern_str = "" _find_kern_str += _find_kern_pre for i in range(3): for j in range(3): for k in range(3): _find_kern_str += "xc = " + str(i - 1) + ";\n" _find_kern_str += "yc = " + str(j - 1) + ";\n" _find_kern_str += "zc = " + str(k - 1) + ";\n\n" _find_kern_str += _find_kern_body _reset_kern_str = """ int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1); $neighbors$[gid] = -1; """ def __init__(self, posarg, group=None, h=1.0, maxneighs=100, index=None, indvec=None, neigharray=None): """Initialize spatial fixed radius neighbor search. Neighbors are searched inside a fixed radius specified by h, which defines the cell size in which the spatial domain is divided during searching. The Algorithm uses Z-order indexing and parallel counting sort (see Syncgroup.sort() and ZIndex for details). Input: posarg -- the positional argument, must be a 3d vector, e.g. CLFloat4, CLDouble4 or CLReal4. group -- the Syncgroup posarg belongs to, if not provided an internal group is created (self._group). h -- cell size for the neighbor search. radius = h/2. maxneighs -- maximum neighbor count per item. index -- Z-order index. --> see ZIndex indvec -- spatial index --> see ZIndex neigharray -- array containing neigbors indices of shape( len(posarg), maxneighs). If not specified available as self.neighbors. """ if not group: group = Syncgroup(posarg) if not (posarg in group._vars): raise ValueError("posarg must be member of group.") self._posarg = posarg self._group = group self._maxneighs = CLScalar(maxneighs) self._indexkernel = ZIndex(posarg=posarg, h=h, index=index, indvec=indvec) self._group.add(self._indexkernel._indvec) self._group.setup_sort(index=self._indexkernel._index) self._neighbors = None if neigharray: neigharray.shape = (self._indexkernel._index._value.size, maxneighs) self._neighbors = neigharray else: self._neighbors = CLInt(self._indexkernel._index._value.size, maxneighs) self._grid_length = CLScalar(len(self._group._grid)) # , name='__grid_length') self._posarg_length = CLScalar(len(self._posarg)) # , name='__posarg_length') src = Neighsearch._find_kern_str src = src.replace("$posarg_length", self._posarg_length.name) src = src.replace("$grid_length", self._grid_length.name) src = src.replace("$posarg", self._posarg.name) src = src.replace("$neighbors", self._neighbors.name) src = src.replace("$ind4", self._indexkernel._indvec.name) src = src.replace("$grid", self._group._grid.name) src = src.replace("$maxneighs", self._maxneighs.name) src = src.replace("$h", self._indexkernel._h.name) varlist = [ self._posarg_length, self._grid_length, self._posarg, self._neighbors, self._indexkernel._indvec, self._group._grid, self._maxneighs, self._indexkernel._h, ] self._findkern = CLKernel(pre=ZIndex._il3_16_src, varlist=varlist, src=src, name="find_neighbors") self._reset_kern = CLTemplateKernel(src=Neighsearch._reset_kern_str, name="reset_neighbors") self._reset_kern.neighbors = self._neighbors self._reset_kern.maxneighs = self._maxneighs self._reset_kern.compile() @property def neighbors(self): return self._neighbors def search(self, h=None, maxneighs=None): self._indexkernel(h=h) if maxneighs: self._maxneighs.value = maxneighs self._neighbors.set_shape_wo_read((self._indexkernel._index._value.size, maxneighs)) elif not (self._indexkernel._index._value.size == self._neighbors.shape[0]): self._neighbors.set_shape_wo_read((self._indexkernel._index._value.size, self.maxneighs.value)) self._reset_kern() self._group.sort() self._grid_length.value = len(self._group._grid) self._posarg_length.value = len(self._posarg) self._findkern() def info(self): return self._indexkernel.info()