def generate_roadmap_parallel(samples, env, max_dist, leafsize, knn): """Parallelized roadmap generator """ n_sample = len(samples) leafsize = knn if len(samples) < leafsize: leafsize = len(samples) - 1 import sharedmem sample_ids = np.arange(n_sample, dtype='i') roadmap = sharedmem.full((n_sample, knn), 0) # Start multi processing over samples with sharedmem.MapReduce() as pool: if n_sample % sharedmem.cpu_count() == 0: chunksize = n_sample / sharedmem.cpu_count() else: chunksize = n_sample / sharedmem.cpu_count() + 1 def work(i): skdtree = KDTree(samples, leafsize=leafsize) sub_sample_ids = sample_ids[slice(i, i + chunksize)] for j, sub_sample_id in enumerate(sub_sample_ids): x = samples[sub_sample_id] try: inds, dists = skdtree.search(x, k=leafsize) except: print "skdtree search failed" sys.exit() edge_id = [] append = edge_id.append for ii, (ind, dist) in enumerate(zip(inds, dists)): if dist > max_dist: break # undirected if len(edge_id) >= knn: break # directed? append(ind) # to complement fewer number of edges for vectorized valueiteration if len(edge_id) < knn: for ii in range(0, len(inds)): #for ind in edge_id: # edge_id.append(ind) # if len(edge_id) >= knn: break append(inds[0]) if len(edge_id) >= knn: break assert len( edge_id ) <= leafsize, "fewer leaves than edges {} (dists={})".format( len(edge_id), dists[:len(edge_id)]) for k in range(len(edge_id)): roadmap[sub_sample_id][k] = edge_id[k] pool.map(work, range(0, n_sample, chunksize)) #, reduce=reduce) # convert sharedmem array to list roadmap = np.array(roadmap).astype(int) skdtree = None #KDTree(samples, leafsize=leafsize) return roadmap.tolist(), skdtree
def argsort(ar): min = minimum.reduce(ar) max = maximum.reduce(ar) nchunk = sharedmem.cpu_count() * 2 #bins = numpy.linspace(min, max, nchunk, endpoint=True) step = 1.0 * (max - min) / nchunk bins = numpy.array( 1.0 * numpy.arange(nchunk + 1) * (max - min) / nchunk + min, min.dtype) dig = digitize(ar, bins) binlength = bincount(dig, minlength=len(bins) + 1) binoffset = numpy.cumsum(binlength) out = sharedmem.empty(len(ar), dtype='intp') with sharedmem.MapReduce() as pool: def work(i): # we can do this a lot faster # but already having pretty good speed. ind = numpy.nonzero(dig == i + 1)[0] myar = ar[ind] out[binoffset[i]:binoffset[i + 1]] = ind[myar.argsort()] pool.map(work, range(nchunk)) return out
def argsort(ar): min = minimum.reduce(ar) max = maximum.reduce(ar) nchunk = sharedmem.cpu_count() * 2 #bins = numpy.linspace(min, max, nchunk, endpoint=True) step = 1.0 * (max - min) / nchunk bins = numpy.array( 1.0 * numpy.arange(nchunk + 1) * (max - min) / nchunk + min, min.dtype) dig = digitize(ar, bins) binlength = bincount(dig, minlength=len(bins) + 1) binoffset = numpy.cumsum(binlength) out = sharedmem.empty(len(ar), dtype='intp') with sharedmem.MapReduce() as pool: def work(i): # we can do this a lot faster # but already having pretty good speed. ind = numpy.nonzero(dig == i + 1)[0] myar = ar[ind] out[binoffset[i]:binoffset[i+1]] = ind[myar.argsort()] pool.map(work, range(nchunk)) return out
def __init__(self, filename): self.filename = filename self.fileObject = h5py.File(self.filename) self.dset = self.fileObject['channel_data'] self.isSnapshot = self.fileObject.attrs['ph_flags'][0] & (1 << 6) self.nsamples, self.nchan = self.dset.shape self.time_ms = np.arange(self.nsamples) * MS_PER_SEC / SAMPLE_RATE self.timeMin = np.min(self.time_ms) self.timeMax = np.max(self.time_ms) self.boardID = self.fileObject.attrs['board_id'][0] if self.isSnapshot: self.cookie = None else: self.cookie = self.fileObject.attrs['experiment_cookie'][0] chipAliveMask = self.fileObject['chip_live'][0] self.chipList = [ i for i in range(NCHIPS) if (chipAliveMask & (0x1 << i)) ] self.isImported = False self.sliceImported = False self.sliceBeenFiltered = False self.ncpu = sharedmem.cpu_count() self.slice_nsamples = 0 self.slice_nchans = None
def fstack_mp(img, fmap): img_stacked = shmem.empty(img.shape[0:2], dtype='uint16') # This implementation is faster than breaking each image plane up for parallel processing def do_work(x): index = ne.evaluate("fmap==x") img_stacked[index] = img[:, :, x][index] index = ne.evaluate("(fmap > x) & (fmap < x+1)") A = fmap[index] B = img[:, :, x+1][index] C = img[:, :, x][index] img_stacked[index] = ne.evaluate("(A-x) * B + (x+1-A) * C") with shmem.MapReduce(np=img.shape[2]-1) as pool: pool.map(do_work, range(img.shape[2]-1)) last_ind = img.shape[2]-1 index = ne.evaluate("fmap == last_ind") num_proc = shmem.cpu_count() edges = get_edges(img, num_proc) def mp_assignment(x): img_stacked[edges[x]:edges[x+1],:][index[edges[x]:edges[x+1],:]] = img[edges[x]:edges[x+1], :, -1]\ [index[edges[x]:edges[x+1], :]] with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment, range(num_proc)) return img_stacked
def sharedmem_pool(total_cores, numexpr=True): # see https://stackoverflow.com/questions/15639779 global AFFINITY_FLAG if not AFFINITY_FLAG: AFFINITY_FLAG = True os.system("taskset -p 0xfff %d" % os.getpid()) if total_cores is None: total_cores = sm.cpu_count() return sm.MapReduce(np=good_process_number(total_cores, numexpr))
def regularizing_objective_function(parameter, bundle): # pragma: no cover # attach the guess for tau to each of the voxels in the bundle for voxel in bundle: model = voxel[1] model.parameter = parameter # fit each of the voxels num_cpus = sharedmem.cpu_count()-1 with sharedmem.Pool(np=num_cpus) as pool: output = pool.map(parallel_fit, bundle) return output
def get_features_from_states(env, states, feature_fn): import sharedmem n_states = len(states) feat_len = len(feature_fn(env, states[0])) state_ids = np.arange(n_states, dtype='i') features = sharedmem.full((n_states, feat_len), 0.) # Start multi processing over support states with sharedmem.MapReduce() as pool: if n_states % sharedmem.cpu_count() == 0: chunksize = n_states / sharedmem.cpu_count() else: chunksize = n_states / sharedmem.cpu_count() + 1 def work(i): s_ids = state_ids[slice(i, i + chunksize)] for j, s_id in enumerate(s_ids): s = states[s_id] # state id in states features[s_id] = feature_fn(env, s) pool.map(work, range(0, n_states, chunksize)) #, reduce=reduce) return np.array(features)
def get_fmap(img): num_proc = shmem.cpu_count() log_kernel = get_log_kernel(11, 2) se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 17)) def mp_imgproc(x): bound_in = (edges[x]-(edges[x] > 0)*50, edges[x+1] + (edges[x+1] < img.shape[0]) *50 ) bound_out = (50 if edges[x] > 0 else 0, None if edges[x+1] == img.shape[0] else -50) part_img = cv2.filter2D(img[bound_in[0]:bound_in[1], :, ii].astype('single'), -1, log_kernel) part_img = cv2.dilate(part_img, se) img_filtered[edges[x]:edges[x+1], :] = part_img[bound_out[0]:bound_out[1], :] def mp_gaussblur(x): bound_in = (edges[x]-(edges[x] > 0)*50, edges[x+1] + (edges[x+1] < img.shape[0]) *50 ) bound_out = (50 if edges[x] > 0 else 0, None if edges[x+1] == img.shape[0] else -50) part_img = cv2.GaussianBlur(fmap[bound_in[0]:bound_in[1], :], (31, 31), 6) fmap[edges[x]:edges[x+1], :] = part_img[bound_out[0]:bound_out[1], :] log_response = shmem.empty(img.shape[0:2], dtype='single') fmap = shmem.empty(img.shape[0:2], dtype='single') edges = get_edges(img, num_proc) def mp_assignment_1(x): log_response[edges[x]:edges[x+1],:] = img_filtered[edges[x]:edges[x+1],:] def mp_assignment_2(x): fmap[index[edges[x]:edges[x+1],:]] = ii for ii in range(img.shape[2]): img_filtered = shmem.empty((img.shape[0], img.shape[1]), dtype='single') with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_imgproc, range(num_proc)) index = ne.evaluate("img_filtered > log_response") with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment_1, range(num_proc)) # log_response[index] = img_filtered[index] with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment_2, range(num_proc)) with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_gaussblur, range(num_proc)) return fmap
def fstack_mp_new(img, fmap): img_stacked = shmem.empty(img.shape[0:2], dtype='uint16') indexl = shmem.empty(img.shape[0:2], dtype='bool') edges = get_edges(img, 16) # This implementation is faster than breaking each image plane up for parallel processing def do_work(x): if x!=img.shape[2]-1: def mt_assignment(input, y): return input[index[edges[y]:edges[y+1],:]] index = ne.evaluate("fmap==x") img_stacked[index] = img[:, :, x][index] index = ne.evaluate("(fmap > x) & (fmap < x+1)") with ThreadPoolExecutor(max_workers=16) as pool: A = np.concatenate([(pool.submit(mt_assignment, fmap, y)).result() for y in range(16)], axis=0) B = np.concatenate([(pool.submit(mt_assignment, img[:, :, x+1], y)).result() for y in range(16)], axis=0) C = np.concatenate([(pool.submit(mt_assignment, img[:, :, x], y)).result() for y in range(16)], axis=0) print('A Shape is : ', A.shape) print('A content is: ', A) img_stacked[index] = ne.evaluate("(A-x) * B + (x+1-A) * C") else: last_ind = img.shape[2]-1 indexl = ne.evaluate("fmap == last_ind") with shmem.MapReduce(np=img.shape[2]) as pool: pool.map(do_work, range(img.shape[2])) num_proc = shmem.cpu_count() edges = get_edges(img, num_proc) def mp_assignment(x): img_stacked[edges[x]:edges[x+1],:][indexl[edges[x]:edges[x+1],:]] = img[edges[x]:edges[x+1], :, -1]\ [indexl[edges[x]:edges[x+1], :]] with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment, range(num_proc)) return img_stacked
def cache(self, grids, ncpus=1, Ns=None): # pragma: no cover # get parameter space if isinstance(grids[0], SliceType): params = [list(np.arange(g.start, g.stop, g.step)) for g in grids] else: params = [list(np.linspace(g[0], g[1], Ns)) for g in grids] # make combos combos = [c for c in itertools.product(*params)] def mini_predictor(combo): print(combo) return self.generate_ballpark_prediction(*combo) # compute predictions num_cpus = sharedmem.cpu_count() - 1 with sharedmem.Pool(np=num_cpus) as pool: models = pool.map(self.mini_predictor, combos) # turn into array return models
hubble = header.hubble chunksize = args.chunksize #ar.ellipsoid.ne.set_num_threads(args.nthreads) mass = [args.minmass, args.maxmass] if args.test: mass = [10, 10.5] chunksize = 5 binwidth = args.binwidth print 'Directory: ', fdir print 'Snapshot: ', snap print 'Boxsize: ', boxsize print 'Hubble parameter: ', hubble print 'Halo mass range for calculation: 10^' + str(mass), 'M_sun' print 'Number of cores for sharedmem: ', sharedmem.cpu_count() print 'Number of threads for numexpr: ', ar.ellipsoid.numba.config.NUMBA_NUM_THREADS print ' ' #nbins=18; a=ar.AxialRatio(fdir,snap,nbins,rmin=10**-2.42857143,useSubhaloes=args.subhaloes) nbins = 2 a = ar.AxialRatio(fdir, snap, 0, [1., 2.], useStellarhalfmassRad=True, useSubhaloID=True, binwidth=binwidth) submass = a.cat.SubhaloMassInRadType[:, 4] / hubble * 1e10 subhalos = np.nonzero((submass > 10**mass[0]) & (submass < 10**mass[1]))[0] nsubs = a.cat.nsubs ngroups = a.cat.ngroups
def computePolicies(mdp, goal_states, error=1e-10): """Compute Q using multi-process """ # initialization of variables n_goal_states = len(goal_states) n_actions, n_states = mdp.n_actions, mdp.n_states roadmap = mdp.roadmap states = mdp.states gamma = mdp.gamma T = mdp.T rewards = mdp.get_rewards() #from IPython import embed; embed(); sys.exit() if rewards is None: rewards=np.zeros(len(mdp.states)) else: rewards = np.array(rewards) rewards[np.where(rewards>0)]=0. goal_state_ids = np.arange(n_goal_states, dtype='i') goal_policy_mat = sharedmem.full((n_goal_states, mdp.n_states, mdp.n_actions), 0.) ## goal_values = sharedmem.full((n_goal_states, mdp.n_states), 0.) ## goal_validity = sharedmem.full((n_goal_states), True) # Start multi processing over goal states with sharedmem.MapReduce() as pool: if n_goal_states % sharedmem.cpu_count() == 0: chunksize = n_goal_states / sharedmem.cpu_count() else: chunksize = n_goal_states / sharedmem.cpu_count() + 1 def work(i): state_ids = goal_state_ids[slice (i, i + chunksize)] #0,1,2,3 new_rewards = copy.copy(rewards) values = np.zeros(n_states) for j, goal_state_id in enumerate(state_ids): s = goal_states[goal_state_id] # state id in states ## s_idx = states.index(s) # vi agent mdp = vi.valueIterAgent(n_actions, n_states, roadmap, None, states, gamma=gamma, T=T) mdp.set_goal(s) if new_rewards[s] >= 0.: new_rewards[s] = 1. mdp.set_rewards(new_rewards) # Store q_mat and validity mat per state policy, _ = mdp.find_policy(error) goal_policy_mat[goal_state_id] = policy # find all states that gives f_g in states for k, gs in enumerate(goal_states): if s == gs: goal_policy_mat[k] = policy # reset new_rewards[s] = 0. pool.map(work, range(0, n_goal_states, chunksize))#, reduce=reduce) # convert sharedmem array to list policies = [] for i in range(n_goal_states): policies.append( np.array(goal_policy_mat[i]) ) return policies
def test_parallel_fit_manual_grids(): # stimulus features viewing_distance = 38 screen_width = 25 thetas = np.arange(0, 360, 45) num_blank_steps = 0 num_bar_steps = 30 ecc = 10 tr_length = 1.0 frames_per_tr = 1.0 scale_factor = 0.10 pixels_down = 100 pixels_across = 100 dtype = ctypes.c_int16 voxel_index = (1, 2, 3) auto_fit = True verbose = 1 # create the sweeping bar stimulus in memory bar = simulate_bar_stimulus(pixels_across, pixels_down, viewing_distance, screen_width, thetas, num_bar_steps, num_blank_steps, ecc) # create an instance of the Stimulus class stimulus = VisualStimulus(bar, viewing_distance, screen_width, scale_factor, tr_length, dtype) # initialize the gaussian model model = og.GaussianModel(stimulus, utils.double_gamma_hrf) model.hrf_delay = 0 # generate a random pRF estimate x = -5.24 y = 2.58 sigma = 1.24 beta = 2.5 baseline = -0.25 # create the "data" data = model.generate_prediction(x, y, sigma, beta, baseline) # set search grid x_grid = slice(-5, 4, 5) y_grid = slice(-5, 7, 5) s_grid = slice(1 / stimulus.ppd, 5.25, 5) b_grid = slice(0.1, 4.0, 5) # set search bounds x_bound = (-12.0, 12.0) y_bound = (-12.0, 12.0) s_bound = (1 / stimulus.ppd, 12.0) b_bound = (1e-8, 1e2) m_bound = (None, None) # loop over each voxel and set up a GaussianFit object grids = ( x_grid, y_grid, s_grid, ) bounds = (x_bound, y_bound, s_bound, b_bound, m_bound) # make 3 voxels all_data = np.array([data, data, data]) num_voxels = data.shape[0] indices = [(1, 2, 3)] * 3 # bundle the voxels bundle = utils.multiprocess_bundle(og.GaussianFit, model, all_data, grids, bounds, indices) # run analysis with sharedmem.Pool(np=sharedmem.cpu_count() - 1) as pool: output = pool.map(utils.parallel_fit, bundle) # assert equivalence for fit in output: npt.assert_almost_equal(fit.x, x, 2) npt.assert_almost_equal(fit.y, y, 2) npt.assert_almost_equal(fit.sigma, sigma, 2) npt.assert_almost_equal(fit.beta, beta, 2) npt.assert_almost_equal(fit.baseline, baseline, 2)
def computeQ(mdp, support_states, error=1e-10, support_features=None, support_feature_state_dict=None, cstr_fn=None, add_no_cstr=True, max_cnt=100, **kwargs): """Compute Q using multi-process """ # initialization of variables n_support_states = len(support_states) n_actions, n_states = mdp.n_actions, mdp.n_states eps = np.finfo(float).eps roadmap = mdp.roadmap states = mdp.states gamma = mdp.gamma T = mdp.T rewards = mdp.get_rewards() #from IPython import embed; embed(); sys.exit() if rewards is None: rewards=np.zeros(len(mdp.states)) else: rewards = np.array(rewards) rewards[np.where(rewards>0)]=0. support_state_ids = np.arange(n_support_states, dtype='i') if support_features is not None: support_feature_ids, support_feature_values = support_features computed_f_id = sharedmem.full(len(support_feature_values), False, dtype='b') else: return NotImplementedError if cstr_fn is None: support_q_mat = sharedmem.full((n_support_states, mdp.n_states, mdp.n_actions), 0.) support_values = sharedmem.full((n_support_states, mdp.n_states), 0.) support_validity = sharedmem.full((n_support_states), True) else: if add_no_cstr: n_cstr_fn = len(cstr_fn) + 1 else: n_cstr_fn = len(cstr_fn) support_q_mat = sharedmem.full((n_support_states, n_cstr_fn, mdp.n_states, mdp.n_actions), 0.) support_values = sharedmem.full((n_support_states, n_cstr_fn, mdp.n_states), 0.) support_validity = sharedmem.full((n_support_states, n_cstr_fn), True) if len(cstr_fn)>0: feat_map = kwargs['feat_map'] roadmap = kwargs['roadmap'] states = mdp.states cstr_T = [] for i in range(len(cstr_fn)): validity_map = cstr_fn[i](None, f=feat_map)[roadmap] validity_map[:,0] = True Tc = mdp.T*validity_map[:,np.newaxis,:] Tc[:,:,0] = eps sum_T = np.sum(Tc, axis=-1) Tc /= sum_T[:,:,np.newaxis] cstr_T.append(Tc) # Start multi processing over support states with sharedmem.MapReduce() as pool: if n_support_states % sharedmem.cpu_count() == 0: chunksize = n_support_states / sharedmem.cpu_count() else: chunksize = n_support_states / sharedmem.cpu_count() + 1 def work(i): state_ids = support_state_ids[slice (i, i + chunksize)] new_rewards = copy.copy(rewards) values = np.zeros(n_states) for j, state_id in enumerate(state_ids): s = support_states[state_id] # state id in states # vi agent mdp = vi.valueIterAgent(n_actions, n_states, roadmap, None, states, gamma=gamma, T=T) mdp.set_goal(s) if support_feature_ids is None: if new_rewards[s] >= 0.: new_rewards[s] = 1. else: # find all states that gives f_g in states f_id = support_feature_ids[state_id] goal_state_ids = support_feature_state_dict[f_id] if computed_f_id[f_id]: continue else: computed_f_id[f_id] = True new_rewards[goal_state_ids] = 1. mdp.set_rewards(new_rewards) # Store q_mat and validity mat per state if cstr_fn is not None: for k in range(len(cstr_fn)): # check if the goal is isolated if np.sum(cstr_T[k][goal_state_ids])>0.: values, param_dict = mdp.solve_mdp(error, init_values=values, T=cstr_T[k], max_cnt=max_cnt, goal=s, return_params=True) support_q_mat[state_id][k] = param_dict['q'] support_validity[state_id][k] = cstr_fn[k](s) support_values[state_id][k] = values if add_no_cstr: values, param_dict = mdp.solve_mdp(error, init_values=values, T=T, max_cnt=max_cnt, ## goal=s, return_params=True) support_q_mat[state_id][-1] = param_dict['q'] support_validity[state_id][-1] = True support_values[state_id][-1] = values else: values, param_dict = mdp.solve_mdp(error, init_values=values, max_cnt=max_cnt, return_params=True) support_q_mat[state_id] = param_dict['q'] support_values[state_id] = values # find all states that gives f_g in states for gs in goal_state_ids: k = support_states.index(gs) if k!=state_id: support_q_mat[k] = support_q_mat[state_id] # reset ## new_rewards = copy.copy(rewards) if support_feature_ids is None: new_rewards[s] = 0. else: new_rewards[goal_state_ids] = 0. pool.map(work, range(0, n_support_states, chunksize))#, reduce=reduce) # convert sharedmem array to dict support_q_mat_dict = {} support_values_dict = {} support_validity_dict = {} for i, s in enumerate(support_states): support_q_mat_dict[s] = np.array(support_q_mat[i]) support_values_dict[s] = np.array(support_values[i]) if cstr_fn is not None: support_validity_dict[s] = np.array(support_validity[i]) if cstr_fn is not None: return support_q_mat_dict, support_values_dict, support_validity_dict else: return support_q_mat_dict, support_values_dict
def main(A): # gaussian are used for each subbox. # slaves are processes so they won't damage these variables # from the master. global sightlines global deltafield global objectidfield global velfield sightlines = Sightlines(A) powerspec = PowerSpectrum(A) # fine1 takes some time, so we do it async # while initing lya and estimating box layout. delta0, var0, disp0 = initcoarse(A, powerspec) varlya = initlya(A) den1 = density2.Density(A.NmeshFine, # Kmax=A.Kmax, Kmin=A.KSplit, power=powerspec, BoxSize=A.BoxSize / A.Nrep) layout = A.layout(len(sightlines), chunksize=1024) Nsamples = sightlines.Nsamples.sum() print 'total number of pixels', Nsamples deltafield = sharedmem.empty(shape=Nsamples, dtype='f4') velfield = sharedmem.empty(shape=Nsamples, dtype='f4') objectidfield = sharedmem.empty(shape=Nsamples, dtype='i4') processors = [ (AddDelta, delta0), (AddDisp(0), disp0[0]), (AddDisp(1), disp0[1]), (AddDisp(2), disp0[2]), ] for proc, d0 in processors: proc.prepare(A, d0) MemoryBytes = numpy.max([proc.MemoryBytes for proc, d0 in processors]) np = int((sharedmem.total_memory() - 1024 ** 3) // MemoryBytes) np = numpy.min([sharedmem.cpu_count(), np]) print 'spawn and work, with ', np, 'slaves', \ 'each use', MemoryBytes /1024.**2, 'MB' var1list = [] with sharedmem.Pool(np=np) as pool: def work(i, j, k): box = layout[i, j, k] var = None for cls, d0 in processors: proc = cls(box, den1, varlya) N = 0 for chunk in box: N += proc.visit(chunk) if cls is AddDelta: var1 = proc.var1 # free memory del proc if N == 0: # no pixels # No need to work on other processors break print 'done', i, j, k, N, var1 return var1 def reduce(v1): if v1 is not None: var1list.append(v1) pool.map(work, A.yieldwork(), reduce=reduce, star=True) deltafield.tofile(A.DeltaField) velfield.tofile(A.VelField) objectidfield.tofile(A.ObjectIDField) D2 = A.cosmology.Dplus(1 / 3.0) / A.cosmology.Dplus(1.0) D3 = A.cosmology.Dplus(1 / 4.0) / A.cosmology.Dplus(1.0) var1 = numpy.nanmean(var1list) var = var0 + var1 + varlya print 'gaussian-variance is', var numpy.savetxt(A.datadir + '/gaussian-variance.txt', [var]) print 'lya field', 'var', var print 'growth factor at z=2.0, 3.0', D2, D3 print 'lya variance adjusted to z=2.0, z=3.0', D2 ** 2 * var, D3 **2 * var
def test_parallel_fit_manual_grids(): # stimulus features viewing_distance = 38 screen_width = 25 thetas = np.arange(0,360,45) num_blank_steps = 0 num_bar_steps = 30 ecc = 10 tr_length = 1.0 frames_per_tr = 1.0 scale_factor = 0.10 pixels_down = 100 pixels_across = 100 dtype = ctypes.c_int16 voxel_index = (1,2,3) auto_fit = True verbose = 1 # create the sweeping bar stimulus in memory bar = simulate_bar_stimulus(pixels_across, pixels_down, viewing_distance, screen_width, thetas, num_bar_steps, num_blank_steps, ecc) # create an instance of the Stimulus class stimulus = VisualStimulus(bar, viewing_distance, screen_width, scale_factor, tr_length, dtype) # initialize the gaussian model model = og.GaussianModel(stimulus, utils.double_gamma_hrf) model.hrf_delay = 0 # generate a random pRF estimate x = -5.24 y = 2.58 sigma = 1.24 beta = 2.5 baseline = -0.25 # create the "data" data = model.generate_prediction(x, y, sigma, beta, baseline) # set search grid x_grid = slice(-5,4,5) y_grid = slice(-5,7,5) s_grid = slice(1/stimulus.ppd,5.25,5) b_grid = slice(0.1,4.0,5) # set search bounds x_bound = (-12.0,12.0) y_bound = (-12.0,12.0) s_bound = (1/stimulus.ppd,12.0) b_bound = (1e-8,1e2) m_bound = (None, None) # loop over each voxel and set up a GaussianFit object grids = (x_grid, y_grid, s_grid,) bounds = (x_bound, y_bound, s_bound, b_bound, m_bound) # make 3 voxels all_data = np.array([data,data,data]) num_voxels = data.shape[0] indices = [(1,2,3)]*3 # bundle the voxels bundle = utils.multiprocess_bundle(og.GaussianFit, model, all_data, grids, bounds, indices) # run analysis with sharedmem.Pool(np=sharedmem.cpu_count()-1) as pool: output = pool.map(utils.parallel_fit, bundle) # assert equivalence for fit in output: npt.assert_almost_equal(fit.x, x, 2) npt.assert_almost_equal(fit.y, y, 2) npt.assert_almost_equal(fit.sigma, sigma, 2) npt.assert_almost_equal(fit.beta, beta, 2) npt.assert_almost_equal(fit.baseline, baseline, 2)
def paint(pos, color, luminosity, sml, camera, CCD, tree=None, return_tree_and_sml=False, normalize=True, np=None, direct_write=False, cumulative=True): """ pos = (x, y, z) color can be None or 1 or array luminosity can be None or 1 or array sml can be None or 1, or array camera is Camera CCD is array of camera.shape, 2. CCD[..., 0] is the color channel (sum of color * luminosity) CCD[..., 1] is the luminosity channel (sum of luminosity) if color is None, CCD.shape == camera.shape if color is not None, CCD.shape == camera.shape, 2 CCD[..., 0] is color CCD[..., 1] is brightness if normalize is False, do not do CCD[..., 0] will be the weighted sum of color. if normalize is True, CCD[..., 0] will be the weighted average of color if direct_write is true, each process will directly write to CCD (CCD must be on sharedmem) if cumulative is False, original content in CCD will be disregarded. if cumulative is True, original content in CCD will be preserved (+=) """ CCDlimit = 20 * 1024 * 1024 # 20M pixel per small CCD camera.shape = (CCD.shape[0], CCD.shape[1]) nCCD = int((CCD.shape[0] * CCD.shape[1] / CCDlimit) ** 0.5) if np is None: np = sharedmem.cpu_count() if nCCD <= np ** 0.5: nCCD = int(np ** 0.5 + 1) cams = camera.divide(nCCD, nCCD) cams = cams.reshape(-1, 3) if tree is None: scale = fc.scale([x.min() for x in pos], [x.ptp() for x in pos]) zkey = sharedmem.empty(len(pos[0]), dtype=fc.fckeytype) with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 1024 def work(i): sl = slice(i, i+chunksize) x, y, z = pos fc.encode(x[sl], y[sl], z[sl], scale=scale, out=zkey[i:i+chunksize]) pool.map(work, range(0, len(zkey), chunksize)) arg = sharedmem.argsort(zkey) tree = zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=8, maxthresh=20) if sml is None: sml = sharedmem.empty(len(zkey), 'f4') with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 64 def work(i): setupsml(tree, [x[i:i+chunksize] for x in pos], out=sml[i:i+chunksize]) pool.map(work, range(0, len(zkey), chunksize)) def writeCCD(i, sparse, myCCD): cam, ox, oy = cams[i] #print i, sparse, len(cams) if sparse: index, C, L = myCCD x = index[0] + ox y = index[1] + oy p = CCD.flat if color is not None: ind = numpy.ravel_multi_index((x, y, 0), CCD.shape) if cumulative: p[ind] += C else: p[ind] = C ind = numpy.ravel_multi_index((x, y, 1), CCD.shape) if cumulative: p[ind] += L else: p[ind] = L else: ind = numpy.ravel_multi_index((x, y), CCD.shape) if cumulative: p[ind] += L else: p[ind] = L else: if color is not None: if cumulative: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1], :] += myCCD else: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1], :] = myCCD else: if cumulative: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1]] += myCCD[..., 1] else: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1]] = myCCD[..., 1] with sharedmem.MapReduce(np=np) as pool: def work(i): cam, ox, oy = cams[i] myCCD = numpy.zeros(cam.shape, dtype=('f8', 2)) cam.paint(pos[0], pos[1], pos[2], sml, color, luminosity, out=myCCD, tree=tree) mask = (myCCD[..., 1] != 0) if mask.sum() < 0.1 * myCCD[..., 1].size: index = mask.nonzero() C = myCCD[..., 0][mask] L = myCCD[..., 1][mask] sparse, myCCD = True, (index, C, L) else: sparse, myCCD = False, myCCD if not direct_write: return i, sparse, myCCD else: writeCCD(i, sparse, myCCD) return 0, 0, 0 def reduce(i, sparse, myCCD): if not direct_write: writeCCD(i, sparse, myCCD) pool.map(work, range(len(cams)), reduce=reduce) if color is not None and normalize: CCD[..., 0] /= CCD[..., 1] if return_tree_and_sml: return CCD, tree, sml else: tree = None sml = None return CCD
def paint(pos, color, luminosity, sml, camera, CCD, tree=None, return_tree_and_sml=False, normalize=True, np=None, direct_write=False, cumulative=True): """ pos = (x, y, z) color can be None or 1 or array luminosity can be None or 1 or array sml can be None or 1, or array camera is Camera CCD is array of camera.shape, 2. CCD[..., 0] is the color channel (sum of color * luminosity) CCD[..., 1] is the luminosity channel (sum of luminosity) if color is None, CCD.shape == camera.shape if color is not None, CCD.shape == camera.shape, 2 CCD[..., 0] is color CCD[..., 1] is brightness if normalize is False, do not do CCD[..., 0] will be the weighted sum of color. if normalize is True, CCD[..., 0] will be the weighted average of color if direct_write is true, each process will directly write to CCD (CCD must be on sharedmem) if cumulative is False, original content in CCD will be disregarded. if cumulative is True, original content in CCD will be preserved (+=) """ CCDlimit = 20 * 1024 * 1024 # 20M pixel per small CCD camera.shape = (CCD.shape[0], CCD.shape[1]) nCCD = int((CCD.shape[0] * CCD.shape[1] / CCDlimit)**0.5) if np is None: np = sharedmem.cpu_count() if nCCD <= np**0.5: nCCD = int(np**0.5 + 1) cams = camera.divide(nCCD, nCCD) cams = cams.reshape(-1, 3) if tree is None: scale = fc.scale([x.min() for x in pos], [x.ptp() for x in pos]) zkey = sharedmem.empty(len(pos[0]), dtype=fc.fckeytype) with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 1024 def work(i): sl = slice(i, i + chunksize) x, y, z = pos fc.encode(x[sl], y[sl], z[sl], scale=scale, out=zkey[i:i + chunksize]) pool.map(work, range(0, len(zkey), chunksize)) arg = numpy.argsort(zkey) tree = zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=8, maxthresh=20) if sml is None: sml = sharedmem.empty(len(zkey), 'f4') with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 64 def work(i): setupsml(tree, [x[i:i + chunksize] for x in pos], out=sml[i:i + chunksize]) pool.map(work, range(0, len(zkey), chunksize)) def writeCCD(i, sparse, myCCD): cam, ox, oy = cams[i] #print i, sparse, len(cams) if sparse: index, C, L = myCCD x = index[0] + ox y = index[1] + oy p = CCD.flat if color is not None: ind = numpy.ravel_multi_index((x, y, 0), CCD.shape) if cumulative: p[ind] += C else: p[ind] = C ind = numpy.ravel_multi_index((x, y, 1), CCD.shape) if cumulative: p[ind] += L else: p[ind] = L else: ind = numpy.ravel_multi_index((x, y), CCD.shape) if cumulative: p[ind] += L else: p[ind] = L else: if color is not None: if cumulative: CCD[ox:ox + cam.shape[0], oy:oy + cam.shape[1], :] += myCCD else: CCD[ox:ox + cam.shape[0], oy:oy + cam.shape[1], :] = myCCD else: if cumulative: CCD[ox:ox + cam.shape[0], oy:oy + cam.shape[1]] += myCCD[..., 1] else: CCD[ox:ox + cam.shape[0], oy:oy + cam.shape[1]] = myCCD[..., 1] with sharedmem.MapReduce(np=np) as pool: def work(i): cam, ox, oy = cams[i] myCCD = numpy.zeros(cam.shape, dtype=('f8', 2)) cam.paint(pos[0], pos[1], pos[2], sml, color, luminosity, out=myCCD, tree=tree) mask = (myCCD[..., 1] != 0) if mask.sum() < 0.1 * myCCD[..., 1].size: index = mask.nonzero() C = myCCD[..., 0][mask] L = myCCD[..., 1][mask] sparse, myCCD = True, (index, C, L) else: sparse, myCCD = False, myCCD if not direct_write: return i, sparse, myCCD else: writeCCD(i, sparse, myCCD) return 0, 0, 0 def reduce(i, sparse, myCCD): if not direct_write: writeCCD(i, sparse, myCCD) pool.map(work, range(len(cams)), reduce=reduce) if color is not None and normalize: CCD[..., 0] /= CCD[..., 1] if return_tree_and_sml: return CCD, tree, sml else: tree = None sml = None return CCD