def _draw(self, pts, colors): if not pts: return False imsize = self.imsize dt0 = time() ind_count = zeros(self.imsize2, npint) colors = row_stack(colors).astype(npfloat) xy = vstack(pts).astype(npfloat) inds = zeros(xy.shape[0], npint) self.cuda_agg(npint(inds.shape[0]), npint(imsize), cuda.In(xy), cuda.InOut(inds), cuda.InOut(ind_count), block=(THREADS, 1, 1), grid=(int(inds.shape[0]//THREADS) + 1, 1)) mask = inds > -1 if not mask.any(): print('-- no dots to draw. time: {:0.4f}'.format(time()-dt0)) return False # xy = xy[mask, :] inds = inds[mask] colors = colors[mask] ind_count_map = _build_ind_count(ind_count) _ind_count_map = cuda.mem_alloc(ind_count_map.nbytes) cuda.memcpy_htod(_ind_count_map, ind_count_map) sort_colors = zeros((inds.shape[0], 4), npfloat) _sort_colors = cuda.mem_alloc(sort_colors.nbytes) cuda.memcpy_htod(_sort_colors, sort_colors) self.cuda_agg_bin(npint(inds.shape[0]), _ind_count_map, cuda.In(colors), cuda.In(inds), _sort_colors, block=(THREADS, 1, 1), grid=(int(inds.shape[0]//THREADS) + 1, 1)) dotn, _ = ind_count_map.shape self.cuda_dot(npint(dotn), self._img, _ind_count_map, _sort_colors, block=(THREADS, 1, 1), grid=(int(dotn//THREADS) + 1, 1)) if self.verbose is not None: print('-- drew dots: {:d}. time: {:0.4f}'.format(colors.shape[0], time()-dt0)) self._updated = True return True
def step(self): import pycuda.driver as drv self.itt += 1 grid = self.grid blocks = self.total_grid_size//self.threads + 1 self.cuda_mass( npint(self.total_grid_size), npint(self.grid_size), drv.In(grid), npint(self.influence_rad), drv.InOut(self.massx[:,:]), drv.InOut(self.massy[:,:]), drv.Out(self.neigh[:,:]), drv.Out(self.connected[:,:]), block=(self.threads,1,1), grid=(blocks,1) ) self.hits[:,:] = 0 self.cuda_agg( npint(self.total_grid_size), npint(self.grid_size), drv.In(grid), drv.In(self.massx[:,:]), drv.In(self.massy[:,:]), drv.In(self.neigh[:,:]), drv.InOut(self.hits[:,:]), block=(self.threads,1,1), grid=(blocks,1) ) self._diminish(0.2) hi, hj = logical_and( self.neigh<=self.crowded_limit, logical_and(self.hits, self.connected>1) ).nonzero() # hi, hj = logical_and( # self.neigh<=15, self.connected>1 # ).nonzero() hit_mask = self.hits[hi,hj]>0 hi = hi[hit_mask] hj = hj[hit_mask] update_mask = random(size=len(hi))<0.2 self.grid[hi[update_mask], hj[update_mask]] = True
def _draw(self, pts, colors): if not pts: return imsize = self.imsize dt0 = time() ind_count = zeros(self.imsize2, npint) colors = row_stack(colors).astype(npfloat) inds = concatenate(pts).astype(npint) _inds = cuda.mem_alloc(inds.nbytes) cuda.memcpy_htod(_inds, inds) aggn = inds.shape[0] self.cuda_agg(npint(aggn), npint(imsize), _inds, cuda.InOut(ind_count), block=(THREADS, 1, 1), grid=(int(aggn // THREADS) + 1, 1)) ind_count_map = _build_ind_count(ind_count) _ind_count_map = cuda.mem_alloc(ind_count_map.nbytes) cuda.memcpy_htod(_ind_count_map, ind_count_map) sort_colors = zeros((aggn, 4), npfloat) _sort_colors = cuda.mem_alloc(sort_colors.nbytes) cuda.memcpy_htod(_sort_colors, sort_colors) self.cuda_agg_bin(npint(aggn), _ind_count_map, cuda.In(colors), _inds, _sort_colors, block=(THREADS, 1, 1), grid=(int(aggn // THREADS) + 1, 1)) dotn, _ = ind_count_map.shape self.cuda_dot(npint(dotn), self._img, _ind_count_map, _sort_colors, block=(THREADS, 1, 1), grid=(int(dotn // THREADS) + 1, 1)) if self.verbose is not None: print('-- drew dots: {:d}. time: {:0.4f}'.format( colors.shape[0], time() - dt0)) self._updated = True
def __make_zonemap(self): from pycuda.driver import In from pycuda.driver import Out from pycuda.driver import InOut vxy = self.vxy vnum = self.vnum zone_num = self.zone_num zone = self.zone zone_num[:] = 0 self.cuda_agg_count( npint(vnum), npint(self.nz), In(vxy[:vnum,:]), InOut(zone_num), block=(self.threads,1,1), grid=(vnum//self.threads + 1,1) ) zone_leap = zone_num[:].max() zone_map_size = self.nz2*zone_leap if zone_map_size>len(self.zone_node): print('resize, new zone leap: ', zone_map_size*2./self.nz2) self.zone_node = zeros(zone_map_size*2, npint) self.zone_node[:] = 0 zone_num[:] = 0 self.cuda_agg( npint(vnum), npint(self.nz), npint(zone_leap), In(vxy[:vnum,:]), InOut(zone_num), InOut(self.zone_node), Out(zone[:vnum]), block=(self.threads,1,1), grid=(vnum//self.threads + 1,1) ) return zone_leap, self.zone_node, zone_num
def sample(self, imsize, verbose=False): if not self._cinit: self.__cuda_init() grains = self._get_n(imsize) ng = self.num*grains xy = zeros((ng, 2), npfloat) _cuda_sample_stroke(npint(ng), self._ab, RGEN.gen_uniform(ng, npfloat), cuda.Out(xy), npint(grains), block=(THREADS, 1, 1), grid=(int(ng//THREADS + 1), 1)) return xy
def update_zone_map(self): self.zone_num[:] = 0 self.cuda_agg( npint(self.nz), npint(self.zone_leap), npint(self.num), drv.In(self.xy[:self.num, :]), drv.InOut(self.zone_num), drv.Out(self.zone_node), block=(self.threads, 1, 1), grid=(int(self.num // self.threads + 1), 1 ) # this cant be a numpy int for some reason ) if not self.itt % 100: m = self.zone_num.max() assert self.zone_leap - 100 > m, 'bad zone leap size' print('zone leap ok {:d}>{:d}'.format(self.zone_leap, m))
def inside(*args, **kwargs): res = f(*args, **kwargs) self = args[0] if self.noise is not None: rad = npfloat(self.noise) ng = res.shape[0] xy = zeros((ng, 2), npfloat) mid = zeros((1, 2), npfloat) _cuda_sample_circle(npint(ng), RGEN.gen_uniform((ng, 3), npfloat), cuda.Out(xy), rad, cuda.In(mid), npint(ng), block=(THREADS, 1, 1), grid=(int(ng//THREADS + 1), 1)) return res + xy return res
def __make_zonemap(self): from pycuda.driver import In from pycuda.driver import Out from pycuda.driver import InOut vxy = self.vxy vnum = self.vnum zone_num = self.zone_num zone = self.zone zone_num[:] = 0 self.cuda_agg_count(npint(vnum), npint(self.nz), In(vxy[:vnum, :]), InOut(zone_num), block=(self.threads, 1, 1), grid=(vnum // self.threads + 1, 1)) zone_leap = zone_num[:].max() zone_map_size = self.nz2 * zone_leap if zone_map_size > len(self.zone_node): print('resize, new zone leap: ', zone_map_size * 2. / self.nz2) self.zone_node = zeros(zone_map_size * 2, npint) self.zone_node[:] = 0 zone_num[:] = 0 self.cuda_agg(npint(vnum), npint(self.nz), npint(zone_leap), In(vxy[:vnum, :]), InOut(zone_num), InOut(self.zone_node), Out(zone[:vnum]), block=(self.threads, 1, 1), grid=(vnum // self.threads + 1, 1)) return zone_leap, self.zone_node, zone_num
def sample(self, imsize, verbose=False): if not self._cinit: self.__cuda_init() grains = self._get_n(imsize) ng = self.num*grains blocks = int(ng//THREADS + 1) shape = (ng, 2) ind = zeros(ng, npint) _cuda_sample_box(npint(ng), npint(imsize), RGEN.gen_uniform(shape, npfloat), cuda.Out(ind), self._s, self._mid, npint(grains), block=(THREADS, 1, 1), grid=(blocks, 1)) return ind
def __rnn_query(self, zone_leap, zone_node, zone_num): from pycuda.driver import In from pycuda.driver import InOut snum = self.snum vnum = self.vnum sv_size = self.sv_size sv = self.sv[:sv_size] sv_num = self.sv_num[:sv_size] dst = self.dst[:sv_size] sv_num[:] = 0 sv[:] = -5 dst[:] = -10.0 self.cuda_rnn( npint(self.nz), npfloat(self.area_rad), npfloat(self.kill_rad), npint(zone_leap), npint(self.sv_leap), In(zone_num), In(zone_node), npint(snum), npint(vnum), In(self.smask), In(self.sxy), In(self.vxy[:vnum,:]), InOut(sv_num), InOut(sv), InOut(dst), block=(self.threads,1,1), grid=(snum//self.threads + 1,1) ) return sv_num, sv, dst
def __rnn_query(self, zone_leap, zone_node, zone_num): from pycuda.driver import In from pycuda.driver import InOut snum = self.snum vnum = self.vnum sv_size = self.sv_size sv = self.sv[:sv_size] sv_num = self.sv_num[:sv_size] dst = self.dst[:sv_size] sv_num[:] = 0 sv[:] = -5 dst[:] = -10.0 self.cuda_rnn(npint(self.nz), npfloat(self.area_rad), npfloat(self.kill_rad), npint(zone_leap), npint(self.sv_leap), In(zone_num), In(zone_node), npint(snum), npint(vnum), In(self.smask), In(self.sxy), In(self.vxy[:vnum, :]), InOut(sv_num), InOut(sv), InOut(dst), block=(self.threads, 1, 1), grid=(snum // self.threads + 1, 1)) return sv_num, sv, dst
def step(self): import pycuda.driver as drv self.itt += 1 num = self.num xy = self.xy dxy = self.dxy blocks = num//self.threads + 1 self.zone_num[:] = 0 self.cuda_agg( npint(num), npint(self.nz), npint(self.zone_leap), drv.In(xy[:num,:]), drv.InOut(self.zone_num), drv.InOut(self.zone_node), block=(self.threads,1,1), grid=(blocks,1) ) self.cuda_step( npint(num), npint(self.nz), npint(self.zone_leap), drv.In(xy[:num,:]), drv.Out(dxy[:num,:]), drv.Out(self.tmp[:num,:]), drv.Out(self.links[:num*10,:]), drv.Out(self.link_counts[:num,:]), drv.In(self.zone_num), drv.In(self.zone_node), npfloat(self.stp), npfloat(self.reject_stp), npfloat(self.spring_stp), npfloat(self.cohesion_stp), npfloat(self.spring_reject_rad), npfloat(self.spring_attract_rad), npint(self.max_capacity), npfloat(self.outer_influence_rad), npfloat(self.link_ignore_rad), block=(self.threads,1,1), grid=(blocks,1) ) xy[:num,:] += dxy[:num,:]
def step(self): import pycuda.driver as drv self.itt += 1 num = self.num xy = self.xy dxy = self.dxy blocks = num // self.threads + 1 self.zone_num[:] = 0 self.cuda_agg(npint(num), npint(self.nz), npint(self.zone_leap), drv.In(xy[:num, :]), drv.InOut(self.zone_num), drv.InOut(self.zone_node), block=(self.threads, 1, 1), grid=(blocks, 1)) self.cuda_step(npint(num), npint(self.nz), npint(self.zone_leap), drv.In(xy[:num, :]), drv.Out(dxy[:num, :]), drv.Out(self.tmp[:num, :]), drv.Out(self.links[:num * 10, :]), drv.Out(self.link_counts[:num, :]), drv.In(self.zone_num), drv.In(self.zone_node), npfloat(self.stp), npfloat(self.reject_stp), npfloat(self.spring_stp), npfloat(self.cohesion_stp), npfloat(self.spring_reject_rad), npfloat(self.spring_attract_rad), npint(self.max_capacity), npfloat(self.outer_influence_rad), npfloat(self.link_ignore_rad), block=(self.threads, 1, 1), grid=(blocks, 1)) xy[:num, :] += dxy[:num, :]
def step(self): self.itt += 1 num = self.num fnum = self.fnum anum = self.anum xy = self.xy[:num, :] visited = self.visited[:num, 0] active = self.active[:anum] fid_node = self.fid_node[:fnum] dxy = self.dxy[:fnum, :] new_dxy = self.new_dxy[:anum, :] # currently active fractures? self.update_zone_map() tmp = self.tmp[:anum, :] tmp[:, :] = -1 new_dxy[:, :] = -10 self.cuda_calc_stp( npint(self.nz), npint(self.zone_leap), npint(num), npint(fnum), npint(anum), npfloat(self.frac_dot), npfloat(self.frac_dst), npfloat(self.frac_stp), npint(self.ignore_fracture_sources), drv.In(visited), drv.In(fid_node), drv.In(active), drv.InOut(tmp), drv.In(xy), drv.In(dxy), drv.Out(new_dxy), drv.In(self.zone_num), drv.In(self.zone_node), block=(self.threads, 1, 1), grid=(int(anum // self.threads + 1), 1 ) # this cant be a numpy int for some reason ) res = self._do_steps(active, new_dxy) return res
def map_angle(ang, range=None, units=None): period = cnst.period_for_unit(units) ang = np.atleast_1d(npfloat(ang)) # if we have a specified angular range, use it if range is not None: angRange = np.atleast_1d(npfloat(args[0])) # divide of multiples of period ang = ang - npint(ang / period) * period lb = angRange.min() ub = angRange.max() if abs(ub - lb) != period: raise RuntimeError('range is incomplete!') lbi = ang < lb while lbi.sum() > 0: ang[lbi] = ang[lbi] + period lbi = ang < lb pass ubi = ang > ub while ubi.sum() > 0: ang[ubi] = ang[ubi] - period ubi = ang > ub pass retval = ang # shouldn't all this be equivalent to: # retval = np.mod(ang - lb, period) + lb ???? # note the particular case below for range (-0.5*period, +0.5*period) # where lb would be -0.5*period. else: retval = np.mod(ang + 0.5*period, period) - 0.5*period return retval
def __growth( self, zone_leap, zone_num, zone_node, vs_map, vs_ind, vs_counts ): from pycuda.driver import In from pycuda.driver import InOut vnum = self.vnum enum = self.enum has_descendants = self.has_descendants gen = self.gen vec = self.vec[:vnum,:] stp = self.stp kill_rad = self.kill_rad edges = self.edges parent = self.parent sxy = self.sxy vxy = self.vxy vec[:,:] = -99.0 self.cuda_growth( npint(self.nz), npfloat(kill_rad), npfloat(stp), npint(zone_leap), In(zone_num), In(zone_node), In(vs_map), In(vs_ind), In(vs_counts), In(sxy), In(vxy[:vnum,:]), npint(vnum), InOut(vec), block=(self.threads,1,1), grid=(vnum//self.threads + 1,1) ) abort = True for i in xrange(vnum): gv = vec[i,:] if gv[0]<-3.0: continue if has_descendants[i]: gen[vnum] = gen[i]+1 else: gen[vnum] = gen[i] has_descendants[i] = True edges[enum, :] = [i,vnum] parent[vnum] = i vxy[vnum,:] = gv abort = False enum += 1 vnum += 1 self.enum = enum self.vnum = vnum return abort
def frac_front(self, factor, angle, dbg=False): inds = (random(self.anum) < factor).nonzero()[0] n = len(inds) if n < 1: return 0 cand_aa = self.active[inds, 0] cand_ii = self.fid_node[cand_aa, 1] num = self.num fnum = self.fnum anum = self.anum xy = self.xy[:num, :] visited = self.visited[:num, 0] new = arange(fnum, fnum + n) orig_dxy = self.dxy[cand_aa, :] diff_theta = (-1)**randint(2, size=n) * HPI + (0.5 - random(n)) * angle theta = arctan2(orig_dxy[:, 1], orig_dxy[:, 0]) + diff_theta fid_node = column_stack((new, cand_ii)) cand_dxy = column_stack((cos(theta), sin(theta))) nactive = arange(n) tmp_dxy = self.tmp_dxy[:n, :] self.update_zone_map() self.cuda_calc_stp( npint(self.nz), npint(self.zone_leap), npint(num), npint(n), npint(n), npfloat(self.frac_dot), npfloat(self.frac_dst), npfloat(self.frac_stp), npint(self.ignore_fracture_sources), drv.In(visited), drv.In(fid_node), drv.In(nactive), drv.Out(self.tmp[:n, :]), drv.In(xy), drv.In(cand_dxy), drv.Out(tmp_dxy), drv.In(self.zone_num), drv.In(self.zone_node), block=(self.threads, 1, 1), grid=(int(n // self.threads + 1), 1 ) # this cant be a numpy int for some reason ) mask = tmp_dxy[:, 0] >= -1.0 n = mask.sum() if n < 1: return 0 nodes = cand_ii[mask] self._add_fracs(cand_dxy[mask, :], nodes) if dbg: self.print_debug(num, fnum, anum, meta='new: {:d}'.format(n)) return n
def step(self, t=None): import pycuda.driver as drv self.itt += 1 num = self.num xy = self.xy dxy = self.dxy tmp = self.tmp link_len = self.link_len link_curv = self.link_curv blocks = num // self.threads + 1 self.zone_num[:] = 0 self.cuda_agg_count(npint(num), npint(self.nz), drv.In(xy[:num, :]), drv.InOut(self.zone_num), block=(self.threads, 1, 1), grid=(blocks, 1)) zone_leap = self.zone_num[:].max() zone_map_size = self.nz2 * zone_leap if zone_map_size > len(self.zone_node): print('resize, new zone leap: ', zone_map_size * 2. / self.nz2) self.zone_node = zeros(zone_map_size * 2, npint) self.zone_num[:] = 0 self.cuda_agg(npint(num), npint(self.nz), npint(zone_leap), drv.In(xy[:num, :]), drv.InOut(self.zone_num), drv.InOut(self.zone_node), block=(self.threads, 1, 1), grid=(blocks, 1)) self.cuda_step(npint(num), npint(self.nz), npint(zone_leap), drv.In(xy[:num, :]), drv.Out(dxy[:num, :]), drv.Out(tmp[:num, :]), drv.Out(link_len[:num, :]), drv.Out(link_curv[:num, :]), drv.In(self.links[:num, :]), drv.In(self.zone_num), drv.In(self.zone_node), npfloat(self.stp), npfloat(self.reject_stp), npfloat(self.spring_stp), npfloat(self.near_rad), npfloat(self.far_rad), block=(self.threads, 1, 1), grid=(blocks, 1)) xy[:num, :] += dxy[:num, :]
def __growth(self, zone_leap, zone_num, zone_node, vs_map, vs_ind, vs_counts): from pycuda.driver import In from pycuda.driver import InOut vnum = self.vnum enum = self.enum has_descendants = self.has_descendants gen = self.gen vec = self.vec[:vnum, :] stp = self.stp kill_rad = self.kill_rad edges = self.edges parent = self.parent sxy = self.sxy vxy = self.vxy vec[:, :] = -99.0 self.cuda_growth(npint(self.nz), npfloat(kill_rad), npfloat(stp), npint(zone_leap), In(zone_num), In(zone_node), In(vs_map), In(vs_ind), In(vs_counts), In(sxy), In(vxy[:vnum, :]), npint(vnum), InOut(vec), block=(self.threads, 1, 1), grid=(vnum // self.threads + 1, 1)) abort = True for i in range(vnum): gv = vec[i, :] if gv[0] < -3.0: continue if has_descendants[i]: gen[vnum] = gen[i] + 1 else: gen[vnum] = gen[i] has_descendants[i] = True edges[enum, :] = [i, vnum] parent[vnum] = i vxy[vnum, :] = gv abort = False enum += 1 vnum += 1 self.enum = enum self.vnum = vnum return abort
def step(self, t=None): import pycuda.driver as drv self.itt += 1 num = self.num xy = self.xy dxy = self.dxy tmp = self.tmp link_len = self.link_len link_curv = self.link_curv blocks = num//self.threads + 1 self.zone_num[:] = 0 self.cuda_agg_count( npint(num), npint(self.nz), drv.In(xy[:num,:]), drv.InOut(self.zone_num), block=(self.threads,1,1), grid=(blocks,1) ) zone_leap = self.zone_num[:].max() zone_map_size = self.nz2*zone_leap if zone_map_size>len(self.zone_node): print('resize, new zone leap: ', zone_map_size*2./self.nz2) self.zone_node = zeros(zone_map_size*2, npint) self.zone_num[:] = 0 self.cuda_agg( npint(num), npint(self.nz), npint(zone_leap), drv.In(xy[:num,:]), drv.InOut(self.zone_num), drv.InOut(self.zone_node), block=(self.threads,1,1), grid=(blocks,1) ) self.cuda_step( npint(num), npint(self.nz), npint(zone_leap), drv.In(xy[:num,:]), drv.Out(dxy[:num,:]), drv.Out(tmp[:num,:]), drv.Out(link_len[:num,:]), drv.Out(link_curv[:num,:]), drv.In(self.links[:num,:]), drv.In(self.zone_num), drv.In(self.zone_node), npfloat(self.stp), npfloat(self.reject_stp), npfloat(self.spring_stp), npfloat(self.near_rad), npfloat(self.far_rad), block=(self.threads,1,1), grid=(blocks,1) ) xy[:num,:] += dxy[:num,:]