def motion_correct_parallel(file_names,fr,template=None,margins_out=0,max_shift_w=5, max_shift_h=5,remove_blanks=False,apply_smooth=True,backend='single_thread'): """motion correct many movies usingthe ipyparallel cluster Parameters ---------- file_names: list of strings names of he files to be motion corrected fr: double fr parameters for calcblitz movie margins_out: int number of pixels to remove from the borders Return ------ base file names of the motion corrected files """ args_in=[]; for f in file_names: args_in.append((f,fr,margins_out,template,max_shift_w, max_shift_h,remove_blanks,apply_smooth)) try: if backend is 'ipyparallel': c = Client() dview=c[:] file_res = dview.map_sync(process_movie_parallel, args_in) dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend is 'single_thread': file_res = map(process_movie_parallel, args_in) else: raise Exception('Unknown backend') except : try: if backend is 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() c.close() except UnboundLocalError as uberr: print 'could not close client' raise return file_res
def extract_rois_patch(file_name, d1, d2, rf=5, stride=5): idx_flat, idx_2d = extract_patch_coordinates(d1, d2, rf=rf, stride=stride) perctl = 95 n_components = 2 tol = 1e-6 max_iter = 5000 args_in = [] for id_f, id_2d in zip(idx_flat, idx_2d): args_in.append((file_name, id_f, id_2d[0].shape, perctl, n_components, tol, max_iter)) st = time.time() print len(idx_flat) try: if 1: c = Client() dview = c[:] file_res = dview.map_sync(nmf_patches, args_in) else: file_res = map(nmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() print time.time() - st A1 = lil_matrix((d1 * d2, len(file_res))) C1 = [] A2 = lil_matrix((d1 * d2, len(file_res))) C2 = [] for count, f in enumerate(file_res): idx_, flt, ca, d = f #flt,ca,_=cse.order_components(coo_matrix(flt),ca) A1[idx_, count] = flt[:, 0][:, np.newaxis] A2[idx_, count] = flt[:, 1][:, np.newaxis] C1.append(ca[0, :]) C2.append(ca[1, :]) # pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10) # pl.pause(.1) return A1, A2, C1, C2
def extract_rois_patch(file_name,d1,d2,rf=5,stride = 5): idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf,stride = stride) perctl=95 n_components=2 tol=1e-6 max_iter=5000 args_in=[] for id_f,id_2d in zip(idx_flat,idx_2d): args_in.append((file_name, id_f,id_2d[0].shape, perctl,n_components,tol,max_iter)) st=time.time() print len(idx_flat) try: if 1: c = Client() dview=c[:] file_res = dview.map_sync(nmf_patches, args_in) else: file_res = map(nmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() print time.time()-st A1=lil_matrix((d1*d2,len(file_res))) C1=[] A2=lil_matrix((d1*d2,len(file_res))) C2=[] for count,f in enumerate(file_res): idx_,flt,ca,d=f #flt,ca,_=cse.order_components(coo_matrix(flt),ca) A1[idx_,count]=flt[:,0][:,np.newaxis] A2[idx_,count]=flt[:,1][:,np.newaxis] C1.append(ca[0,:]) C2.append(ca[1,:]) # pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10) # pl.pause(.1) return A1,A2,C1,C2
def run_CNMF_patches(file_name, shape, options, rf=16, stride = 4, n_processes=2, backend='single_thread',memory_fact=1): """Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running Parameters ---------- file_name: string full path to an npy file (2D, pixels x time) containing the movie shape: tuple of thre elements dimensions of the original movie across y, x, and time options: dictionary containing all the parameters for the various algorithms rf: int half-size of the square patch in pixel stride: int amount of overlap between patches backend: string 'ipyparallel' or 'single_thread' n_processes: int nuber of cores to be used (should be less than the number of cores started with ipyparallel) memory_fact: double unitless number accounting how much memory should be used. It represents the fration of patch processed in a single thread. You will need to try different values to see which one would work Returns ------- A_tot: matrix containing all the componenents from all the patches C_tot: matrix containing the calcium traces corresponding to A_tot sn_tot: per pixel noise estimate optional_outputs: set of outputs related to the result of CNMF ALGORITHM ON EACH patch """ (d1,d2,T)=shape d=d1*d2 K=options['init_params']['K'] options['preprocess_params']['backend']='single_thread' options['preprocess_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact) options['spatial_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact) options['temporal_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact) options['spatial_params']['backend']='single_thread' options['temporal_params']['backend']='single_thread' idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf, stride = stride) # import pdb # pdb.set_trace() args_in=[] for id_f,id_2d in zip(idx_flat[:],idx_2d[:]): args_in.append((file_name, id_f,id_2d[0].shape, options)) print len(idx_flat) st=time.time() if backend is 'ipyparallel': try: c = Client() dview=c[:n_processes] file_res = dview.map_sync(cnmf_patches, args_in) dview.results.clear() c.purge_results('all') c.purge_everything() c.close() except: print('Something went wrong') raise finally: print('You may think that it went well but reality is harsh') elif backend is 'single_thread': file_res = map(cnmf_patches, args_in) else: raise Exception('Backend unknown') print time.time()-st # extract the values from the output of mapped computation num_patches=len(file_res) A_tot=scipy.sparse.csc_matrix((d,K*num_patches)) B_tot=scipy.sparse.csc_matrix((d,num_patches)) C_tot=np.zeros((K*num_patches,T)) F_tot=np.zeros((num_patches,T)) mask=np.zeros(d) sn_tot=np.zeros((d1*d2)) b_tot=[] f_tot=[] bl_tot=[] c1_tot=[] neurons_sn_tot=[] g_tot=[] idx_tot=[]; shapes_tot=[] id_patch_tot=[] count=0 patch_id=0 print 'Transforming patches into full matrix' for idx_,shapes,A,b,C,f,S,bl,c1,neurons_sn,g,sn,_ in file_res: sn_tot[idx_]=sn b_tot.append(b) f_tot.append(f) bl_tot.append(bl) c1_tot.append(c1) neurons_sn_tot.append(neurons_sn) g_tot.append(g) idx_tot.append(idx_) shapes_tot.append(shapes) mask[idx_] += 1 F_tot[patch_id,:]=f B_tot[idx_,patch_id]=b for ii in range(np.shape(A)[-1]): new_comp=A.tocsc()[:,ii]/np.sqrt(np.sum(np.array(A.tocsc()[:,ii].todense())**2)) if new_comp.sum()>0: A_tot[idx_,count]=new_comp C_tot[count,:]=C[ii,:] id_patch_tot.append(patch_id) count+=1 patch_id+=1 A_tot=A_tot[:,:count] C_tot=C_tot[:count,:] optional_outputs=dict() optional_outputs['b_tot']=b_tot optional_outputs['f_tot']=f_tot optional_outputs['bl_tot']=bl_tot optional_outputs['c1_tot']=c1_tot optional_outputs['neurons_sn_tot']=neurons_sn_tot optional_outputs['g_tot']=g_tot optional_outputs['idx_tot']=idx_tot optional_outputs['shapes_tot']=shapes_tot optional_outputs['id_patch_tot']= id_patch_tot optional_outputs['B'] = B_tot optional_outputs['F'] = F_tot optional_outputs['mask'] = mask Im = scipy.sparse.csr_matrix((1./mask,(np.arange(d),np.arange(d)))) Bm = Im.dot(B_tot) A_tot = Im.dot(A_tot) f = np.mean(F_tot,axis=0) for iter in range(10): b = Bm.dot(F_tot.dot(f))/np.sum(f**2) f = np.dot((Bm.T.dot(b)).T,F_tot)/np.sum(b**2) return A_tot,C_tot,b,f,sn_tot, optional_outputs
def update_temporal_components_parallel(Y, A, b, Cin, fin, bl = None, c1 = None, g = None, sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for basis projection pursuit cvx or spgl1 or debug for fast but possibly imprecise temporal components Returns -------- C: np.matrix matrix of temporal components (K x T) f: np.array vector of temporal background (length T) Y_res: np.ndarray matrix with current residual (d x T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d,T = np.shape(Y); nr = np.shape(A)[-1] if bl is None: bl=np.repeat(None,nr) if c1 is None: c1=np.repeat(None,nr) if g is None: g=np.repeat(None,nr) if sn is None: sn=np.repeat(None,nr) A = scipy.sparse.hstack((A,coo_matrix(b))) S = np.zeros(np.shape(Cin)); Cin = np.vstack((Cin,fin)); C = Cin; #% nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0))) Sp = np.zeros((nr,T)) #YrA = Y.T*A - Cin.T*(A.T*A); # Y=np.matrix(Y) # C=np.matrix(C) # Cin=np.matrix(Cin) # YrA2 = Y.T*A - Cin.T*(A.T*A); Cin=coo_matrix(Cin) YrA = (A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes Cin=np.array(Cin.todense()) for iter in range(ITER): O,lo = update_order(A.tocsc()[:,:nr]) P_=[]; for count,jo_ in enumerate(O): jo=np.array(list(jo_)) Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ctemp = np.zeros((np.size(jo),T)) Stemp = np.zeros((np.size(jo),T)) btemp = np.zeros((np.size(jo),1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo),kwargs['p'])); nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] if backend == 'ipyparallel': results = dview.map_sync(constrained_foopsi_parallel,args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel,args_in) else: raise Exception('Backend not defined. Use either single_thread or ipyparallel') for chunk in results: #pars=dict(kwargs) C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk Ctemp[jj_,:] = C_[None,:] Stemp[jj_,:] = Sp_ Ytemp[:,jj_] = Ytemp_[:,None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_,:] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gtemp[jj,:]#[jj_,np.abs(gtemp[jj,:])>0] #pars['b'] = cb_ # pars['c1'] = c1_ # pars['neuron_sn'] = sn_ # pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] # ## for jj = 1:length(O{jo}) ## P.gn(O{jo}(jj)) = {gtemp(jj,abs(gtemp(jj,:))>0)'}; ## end # pars['neuron_id'] = jo[jj_] # P_.append(pars) YrA[:,jo] = Ytemp C[jo,:] = Ctemp S[jo,:] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count])) + ' out of total ' + str(nr) + ' temporal components updated \n' ii=nr YrA[:,ii] = YrA[:,ii] + nA[ii]*np.atleast_2d(Cin[ii,:]).T cc = np.maximum(YrA[:,ii]/nA[ii],0) C[ii,:] = cc[:].T YrA[:,ii] = YrA[:,ii] - nA[ii]*np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C Y_res = Y - A*C # this includes the baseline term f = C[nr:,:] C = C[:nr,:] P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C,f,Y_res,S,bl,c1,sn,g
def specific_index(matrix, shape, indexes_file, indexes_name, method = 1): import scipy.io as sio workspace = sio.loadmat(indexes_file) shape = int(shape) covmat = np.array(matrix) covmat = np.reshape(covmat,(shape,shape)) det_method = """def det(): N = len(indexes_list) dets = numpy.zeros(N,dtype=numpy.float32) ent = numpy.zeros(N,dtype=numpy.float32) dets[:] = 0 #numpy.nan ent[:] = 0 #numpy.nan if len(indexes_list) > 0: idx_mat = indexes_list else: return {'dets':[],'ents':[],'idx':[]} matrices = [] idxs = [] i=0 for ind in idx_mat: idx = ind[0][0] - 1 matrix = covmat[idx][:,idx] dets[i] = numpy.linalg.det(matrix) ent[i] = entropy(k,dets[i]) matrices.append(matrix) idxs.append(ind) i += 1 return {'dets':dets,'ents':ent,'idx':idxs}""" if method == 1: entropy_method = """def entropy(x,y): return 0.5*numpy.log((2*numpy.pi * numpy.exp(1))**(x)*y) """ elif method == 2: entropy_method = """def entropy(x,y): return 0.5*numpy.log(numpy.abs((2*numpy.pi * numpy.exp(1))**(x)*y)) """ elif method == 3: entropy_method = """def entropy(x,y): return #0.5*numpy.log(numpy.abs((2*numpy.pi * numpy.exp(1))**(x)*y)) """ from ipyparallel import Client rc = Client(profile='brincolab-cluster') rc.purge_everything() #Limpiar la sesion dview = rc[:] # use all engines #dview.use_pickle() print(len(dview)) #dview.block = True #Modo block dview['covmat'] = covmat #Distribuimos la matriz de covarianza dview.execute(det_method) dview.execute(entropy_method) indexes_list = workspace[indexes_name] dview.scatter('indexes_list',indexes_list) dview.execute('res = det()', block=True) responses = dview.gather('res', block=True) dets = np.array([], dtype=np.float32) ents = np.array([], dtype=np.float32) idxs = [] for el in responses: #print(el.keys()) dets = np.append(dets,el['dets']) ents = np.append(ents,el['ents']) for subel in el['idx']: idxs.append(subel) dets = dets[~np.isnan(dets)] ents = ents[~np.isnan(ents)] print(len(dets),len(ents),len(idxs)) print() rc.purge_everything() #Limpiar la sesion return (dets,ents,idxs) #ar = dview.apply_async(det) #ar.wait() #responses = ar.get() return 0
def get_all_determinants(matrix, shape, method = 1): det_method = """def det(N): dets = numpy.zeros(N,dtype=numpy.float32) ent = numpy.zeros(N,dtype=numpy.float32) dets[:] = -1 #numpy.nan ent[:] = -1 #numpy.nan if len(index_mat) > 0: idx_mat = index_mat[0] else: return {'dets':[],'ents':[],'idx':[]} matrices = [] idxs = [] i=0 for ind in idx_mat: idx = list(ind) matrix = covmat[idx][:,idx] dets[i] = numpy.linalg.det(matrix) ent[i] = entropy(k,dets[i]) matrices.append(matrix) idxs.append(ind) i += 1 return {'dets':dets,'ents':ent,'idx':idxs}""" if method == 1: entropy_method = """def entropy(x,y): return 0.5*numpy.log((2*numpy.pi * numpy.exp(1))**(x)*y) """ elif method == 2: entropy_method = """def entropy(x,y): return 0.5*numpy.log(numpy.abs((2*numpy.pi * numpy.exp(1))**(x)*y)) """ ## Funcion para calcular el determinante shape = int(shape) covmat = np.array(matrix) covmat = np.reshape(covmat,(shape,shape)) N = covmat.shape[0] ## Cambiar lectura de matriz k = N-1 #Aca corresponde ingresar la matriz de covarianzas ya calculada arr = list(range(0,N)) #Arreglo para generar las mascaras de las submatrices cuadradas from ipyparallel import Client rc = Client(profile='brincolab-cluster') rc.purge_everything() #Limpiar la sesion dview = rc[:] # use all engines #dview.use_pickle() print(len(dview)) #dview.block = True #Modo block dview['covmat'] = covmat #Distribuimos la matriz de covarianza dview['k'] = k # N-1 dview.execute(det_method) dview.execute(entropy_method) iteraciones = {} det_calcular = {} indices_scatter = {} for i in range(2,N): iteraciones[i] = itertools.combinations(arr, i) iters = itertools.combinations(arr, i) n = int(factorial(N) / factorial(i) / factorial(N-i)) n_per_process = ceil(int(n)/len(rc.ids) ) print(i, n, n_per_process) det_calcular[i] = (n,n_per_process) indexes = [] for j in range(0,n,n_per_process): sl = islice(iters, j , j + n_per_process ) indexes.append(sl) indices_scatter[i] = indexes with dview.sync_imports(): import numpy #import determinantes responses = {} for i in range(2,N): n_dets = det_calcular[i][0] n_scatters = det_calcular[i][1] print(n_scatters) dview.scatter('index_mat', indices_scatter[i], block=True) dview.execute('res = det({})'.format(n_scatters), block=True) responses[i] = dview.gather('res', block=True) dets = np.array([], dtype=np.float32) ents = np.array([], dtype=np.float32) idxs = [] #print(idxs) for i in tqdm(range(2,N)): for el in responses[i]: #print(el.keys()) if -1 in el['dets']: mask = np.array(el['dets']) == -1 #print(mask) dets = np.append(dets,el['dets'][~mask]) ents = np.append(ents,el['ents'][~mask]) else: dets = np.append(dets,el['dets']) ents = np.append(ents,el['ents']) for subel in el['idx']: idxs.append(subel) #for j in responses[i]['dets']: # print(j) dets = dets[~numpy.isnan(dets)] ents = ents[~numpy.isnan(ents)] print(len(dets),len(ents),len(idxs)) mask = dets == -1 print(len(dets[mask])) rc.purge_everything() #Limpiar la sesion return (dets,ents,idxs)
rng = RandomState(0) seeds = rng.random_integers(0, 2**31 - 2, size=EX_NUM) for tr in trends: results = zeros((len(percentiles), len(T), EX_NUM)) * nan filename = "adf_z_" + tr + ".npz" for i in range(EX_NUM): print("Experiment Number {0} for Trend {1}".format(i + 1, tr)) # Non parallel version # args = (T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m) # out = [wrapper(a, b, c, d) for a, b, c, d in args] now = datetime.datetime.now() out = lview.map_sync(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m) # Prevent unnecessary results from accumulating lview.purge_results("all") rc.purge_everything() print(datetime.datetime.now() - now) quantiles = [percentile(x, percentiles) for x in out] results[:, :, i] = array(quantiles).T if i % 50 == 0: savez(filename, trend=tr, results=results, percentiles=percentiles, T=T) savez(filename, trend=tr, results=results, percentiles=percentiles, T=T)
def update_spatial_components_parallel(Y, C, f, A_in, sn=None, d1=None, d2=None, min_size=3, max_size=8, dist=3, method='ellipse', expandCore=None, backend='single_thread', n_processes=4, n_pixels_per_process=128, memory_efficient=False): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'multiprocessing', 'threading', 'ipyparallel', 'single_thread' single_thread:no parallelization. It shoul dbe used in most cases. multiprocessing or threading: use the corresponding python threading package. It has known issues on mac OS. Not to be used in most situations. ipyparallel: starts an ipython cluster and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread memory_efficient [bool] whether or not to reduce memory usage (at the expense of increased computational time) method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore = iterate_structure(generate_binary_structure(2, 1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') Y = np.atleast_2d(Y) if Y.shape[1] == 1: raise Exception('Dimension of Matrix Y must be pixels x time') C = np.atleast_2d(C) if C.shape[1] == 1: raise Exception('Dimension of Matrix C must be neurons x time') f = np.atleast_2d(f) if f.shape[1] == 1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape) == 1: A_in = np.atleast_2d(A_in).T if A_in.shape[0] == 1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C, f)) # create matrix that include background components [d, T] = np.shape(Y) if n_pixels_per_process > d: raise Exception( 'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.' ) nr, _ = np.shape(C) # number of neurons IND = determine_search_location(A_in, d1, d2, method=method, min_size=min_size, max_size=max_size, dist=dist, expandCore=expandCore) print " find search location" ind2_ = [ np.hstack( (np.where(iid_)[0], nr + np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else [] for iid_ in IND ] folder = tempfile.mkdtemp() if backend == 'multiprocessing' or backend == 'threading': A_name = os.path.join(folder, 'A_temp') # Pre-allocate a writeable shared memory map as a container for the # results of the parallel computation print "Create Matrix for dumping data from matrix A and C for parallel computation...." A_ = np.memmap(A_name, dtype=A_in.dtype, shape=(d, nr + np.size(f, 0)), mode='w+') pixels_name = os.path.join(folder, 'pixels') C_name = os.path.join(folder, 'C_temp') # Dump the input data to disk to free the memory dump(Y, pixels_name) dump(Cf, C_name) # use mempry mapped versions of C and Y Y = load(pixels_name, mmap_mode='r') Cf = load(C_name, mmap_mode='r') pixel_groups = [ range(i, i + n_pixels_per_process) for i in range(0, Y.shape[0] - n_pixels_per_process + 1, n_pixels_per_process) ] # Fork the worker processes to perform computation concurrently print "start parallel pool..." sys.stdout.flush() Parallel(n_jobs=n_processes, backend=backend, verbose=100, max_nbytes=None)(delayed(lars_regression_noise_parallel)( Y, Cf, A_, sn, i, ind2_) for i in pixel_groups) # if n_pixels_per_process is not a multiple of Y.shape[0] run on remaining pixels pixels_remaining = Y.shape[0] % n_pixels_per_process if pixels_remaining > 0: print "Running deconvolution for remaining pixels:" + str( pixels_remaining) lars_regression_noise_parallel(Y, Cf, A_, sn, range(Y.shape[0] - pixels_remaining, Y.shape[0]), ind2_, positive=1) A_ = np.array(A_) elif backend == 'ipyparallel': # use the ipyparallel package, you need to start a cluster server (ipcluster command) in order to use it C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name, Cf) if type( Y ) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename else: # if not create a memory mapped version (necessary for parallelization) Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name, Y) Y = np.load(Y_name, mmap_mode='r') # create arguments to be passed to the function. Here we are grouping bunch of pixels to be processed by each thread pixel_groups = [(Y_name, C_name, sn, ind2_, range(i, i + n_pixels_per_process)) for i in range(0, d1 * d2 - n_pixels_per_process + 1, n_pixels_per_process)] A_ = np.zeros((d, nr + np.size(f, 0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value" ) dview = c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) for chunk in parallel_result: for pars in chunk: px, idxs_, a = pars A_[px, idxs_] = a #clean up dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend == 'single_thread': Cf_ = [Cf[idx_, :] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0))))) for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)): if px % 1000 == 0: print px if np.size(c) > 0: _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1, sn[px]**2 * T) if np.isscalar(a): A_[px, id2_] = a else: A_[px, id2_] = a.T else: raise Exception( 'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel' ) #% print 'Updated Spatial Components' A_ = threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_, axis=0) == 0) # remove empty components if np.size(ff) > 0: ff = ff[0] warn('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_, list(ff), 1) C = np.delete(C, list(ff), 0) A_ = A_[:, :nr] A_ = coo_matrix(A_) if memory_efficient: print "Using memory efficient computation (slow but memory preserving)" A__ = coo_matrix(A_, dtype=np.float32) C__ = coo_matrix(C[:nr, :], dtype=np.float32) Y_res_name = os.path.join(folder, 'Y_res_temp.npy') Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='w+', shape=Y.shape) Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='r+', shape=Y.shape) print "computing residuals" Y_res[:] = -A__.dot(C__).todense()[:] Y_res[:] += Y else: print "Using memory trade-off computation (good use of memory if input is memmaped)" Y_res = Y - A_.dot(coo_matrix(C[:nr, :])) print "Computing A_bas" A_bas = np.fmax(np.dot(Y_res, f.T) / scipy.linalg.norm(f)**2, 0) # update baseline based on residual Y_res[:] = 1 b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: #clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_, b, C
def update_temporal_components(Y, A, b, Cin, fin, bl = None, c1 = None, g = None, sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, debug=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for constrained foopsi. Choices are 'cvx': using cvxopt and picos (slow especially without the MOSEK solver) 'cvxpy': using cvxopt and cvxpy with the ECOS solver (faster, default) 'spgl1': using the spgl1 package 'debug': using spgl1 without spike non-negativity constraints (just for debugging purposes) solvers: list string primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS'] Note -------- The temporal components are updated in parallel by default by forming of sequence of vertex covers. Returns -------- C: np.ndarray matrix of temporal components (K x T) f: np.array vector of temporal background (length T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input YrA: np.ndarray matrix of spatial component filtered raw data, after all contributions have been removed. YrA corresponds to the residual trace for each component and is used for faster plotting (K x T) """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d,T = np.shape(Y); nr = np.shape(A)[-1] if bl is None: bl=np.repeat(None,nr) if c1 is None: c1=np.repeat(None,nr) if g is None: g=np.repeat(None,nr) if sn is None: sn=np.repeat(None,nr) A = scipy.sparse.hstack((A,coo_matrix(b))) S = np.zeros(np.shape(Cin)); Cin = np.vstack((Cin,fin)); C = Cin; nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0))) #import pdb #pdb.set_trace() Cin=coo_matrix(Cin) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))) YA = (A.T.dot(Y).T)*spdiags(1./nA,0,nr+1,nr+1) AA = ((A.T.dot(A))*spdiags(1./nA,0,nr+1,nr+1)).tocsr() YrA = YA - Cin.T.dot(AA) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes Cin=np.array(Cin.todense()) for iter in range(ITER): O,lo = update_order(A.tocsc()[:,:nr]) P_=[]; for count,jo_ in enumerate(O): jo=np.array(list(jo_)) #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ytemp = YrA[:,jo.flatten()] + Cin[jo,:].T Ctemp = np.zeros((np.size(jo),T)) Stemp = np.zeros((np.size(jo),T)) btemp = np.zeros((np.size(jo),1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo),kwargs['p'])); nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] # import pdb # pdb.set_trace() if backend == 'ipyparallel': # if debug: results = dview.map_async(constrained_foopsi_parallel,args_in) results.get() for outp in results.stdout: print outp[:-1] sys.stdout.flush() for outp in results.stderr: print outp[:-1] sys.stderr.flush() else: results = dview.map_sync(constrained_foopsi_parallel,args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel,args_in) else: raise Exception('Backend not defined. Use either single_thread or ipyparallel') for chunk in results: pars=dict() C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk Ctemp[jj_,:] = C_[None,:] Stemp[jj_,:] = Sp_ Ytemp[:,jj_] = Ytemp_[:,None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_,:] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:] pars['b'] = cb_ pars['c1'] = c1_ pars['neuron_sn'] = sn_ pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] pars['neuron_id'] = jo[jj_] P_.append(pars) YrA -= (Ctemp-C[jo,:]).T*AA[jo,:] #YrA[:,jo] = Ytemp C[jo,:] = Ctemp.copy() S[jo,:] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count+1])) + ' out of total ' + str(nr) + ' temporal components updated' ii=nr #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T #cc = np.maximum(YrA[:,ii],0) cc = np.maximum(YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T,0) YrA -= (cc-np.atleast_2d(Cin[ii,:]).T)*AA[ii,:] C[ii,:] = cc.T #YrA = YA - C.T.dot(AA) #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C f = C[nr:,:] C = C[:nr,:] YrA = np.array(YrA[:,:nr]).T P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C,f,S,bl,c1,sn,g,YrA #,P_
def run_CNMF_patches(file_name, shape, options, rf=16, stride=4, n_processes=2, backend='single_thread', memory_fact=1): """Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running Parameters ---------- file_name: string full path to an npy file (2D, pixels x time) containing the movie shape: tuple of thre elements dimensions of the original movie across y, x, and time options: dictionary containing all the parameters for the various algorithms rf: int half-size of the square patch in pixel stride: int amount of overlap between patches backend: string 'ipyparallel' or 'single_thread' n_processes: int nuber of cores to be used (should be less than the number of cores started with ipyparallel) memory_fact: double unitless number accounting how much memory should be used. It represents the fration of patch processed in a single thread. You will need to try different values to see which one would work Returns ------- A_tot: matrix containing all the componenents from all the patches C_tot: matrix containing the calcium traces corresponding to A_tot sn_tot: per pixel noise estimate optional_outputs: set of outputs related to the result of CNMF ALGORITHM ON EACH patch """ (d1, d2, T) = shape d = d1 * d2 K = options['init_params']['K'] options['preprocess_params']['backend'] = 'single_thread' options['preprocess_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['spatial_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['temporal_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['spatial_params']['backend'] = 'single_thread' options['temporal_params']['backend'] = 'single_thread' idx_flat, idx_2d = extract_patch_coordinates(d1, d2, rf=rf, stride=stride) # import pdb # pdb.set_trace() args_in = [] for id_f, id_2d in zip(idx_flat[:], idx_2d[:]): args_in.append((file_name, id_f, id_2d[0].shape, options)) print len(idx_flat) st = time.time() if backend is 'ipyparallel': try: c = Client() dview = c[:n_processes] file_res = dview.map_sync(cnmf_patches, args_in) dview.results.clear() c.purge_results('all') c.purge_everything() c.close() except: print('Something went wrong') raise finally: print('You may think that it went well but reality is harsh') elif backend is 'single_thread': file_res = map(cnmf_patches, args_in) else: raise Exception('Backend unknown') print time.time() - st # extract the values from the output of mapped computation num_patches = len(file_res) A_tot = scipy.sparse.csc_matrix((d, K * num_patches)) B_tot = scipy.sparse.csc_matrix((d, num_patches)) C_tot = np.zeros((K * num_patches, T)) F_tot = np.zeros((num_patches, T)) mask = np.zeros(d) sn_tot = np.zeros((d1 * d2)) b_tot = [] f_tot = [] bl_tot = [] c1_tot = [] neurons_sn_tot = [] g_tot = [] idx_tot = [] shapes_tot = [] id_patch_tot = [] count = 0 patch_id = 0 print 'Transforming patches into full matrix' for idx_, shapes, A, b, C, f, S, bl, c1, neurons_sn, g, sn, _ in file_res: sn_tot[idx_] = sn b_tot.append(b) f_tot.append(f) bl_tot.append(bl) c1_tot.append(c1) neurons_sn_tot.append(neurons_sn) g_tot.append(g) idx_tot.append(idx_) shapes_tot.append(shapes) mask[idx_] += 1 F_tot[patch_id, :] = f B_tot[idx_, patch_id] = b for ii in range(np.shape(A)[-1]): new_comp = A.tocsc()[:, ii] / np.sqrt( np.sum(np.array(A.tocsc()[:, ii].todense())**2)) if new_comp.sum() > 0: A_tot[idx_, count] = new_comp C_tot[count, :] = C[ii, :] id_patch_tot.append(patch_id) count += 1 patch_id += 1 A_tot = A_tot[:, :count] C_tot = C_tot[:count, :] optional_outputs = dict() optional_outputs['b_tot'] = b_tot optional_outputs['f_tot'] = f_tot optional_outputs['bl_tot'] = bl_tot optional_outputs['c1_tot'] = c1_tot optional_outputs['neurons_sn_tot'] = neurons_sn_tot optional_outputs['g_tot'] = g_tot optional_outputs['idx_tot'] = idx_tot optional_outputs['shapes_tot'] = shapes_tot optional_outputs['id_patch_tot'] = id_patch_tot optional_outputs['B'] = B_tot optional_outputs['F'] = F_tot optional_outputs['mask'] = mask Im = scipy.sparse.csr_matrix((1. / mask, (np.arange(d), np.arange(d)))) Bm = Im.dot(B_tot) A_tot = Im.dot(A_tot) f = np.mean(F_tot, axis=0) for iter in range(10): b = Bm.dot(F_tot.dot(f)) / np.sum(f**2) f = np.dot((Bm.T.dot(b)).T, F_tot) / np.sum(b**2) return A_tot, C_tot, b, f, sn_tot, optional_outputs
def update_spatial_components(Y, C, f, A_in, sn=None, d1=None, d2=None, min_size=3, max_size=8, dist=3, method='ellipse', expandCore=None, backend='single_thread', n_processes=4, n_pixels_per_process=128): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'ipyparallel', 'single_thread' single_thread:no parallelization. It can be used with small datasets. ipyparallel: uses ipython clusters and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore = iterate_structure(generate_binary_structure(2, 1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') if Y.ndim < 2 and not type(Y) is str: Y = np.atleast_2d(Y) if Y.shape[1] == 1: raise Exception('Dimension of Matrix Y must be pixels x time') C = np.atleast_2d(C) if C.shape[1] == 1: raise Exception('Dimension of Matrix C must be neurons x time') f = np.atleast_2d(f) if f.shape[1] == 1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape) == 1: A_in = np.atleast_2d(A_in).T if A_in.shape[0] == 1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C, f)) # create matrix that include background components [d, T] = np.shape(Y) if n_pixels_per_process > d: raise Exception( 'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.' ) nr, _ = np.shape(C) # number of neurons IND = determine_search_location(A_in, d1, d2, method=method, min_size=min_size, max_size=max_size, dist=dist, expandCore=expandCore) print " find search location" ind2_ = [ np.hstack( (np.where(iid_)[0], nr + np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else [] for iid_ in IND ] folder = tempfile.mkdtemp() # use the ipyparallel package, you need to start a cluster server # (ipcluster command) in order to use it if backend == 'ipyparallel': C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name, Cf) if type( Y ) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename # if not create a memory mapped version (necessary for parallelization) elif type(Y) is str: Y_name = Y else: Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name, Y) Y, _, _, _ = load_memmap(Y_name) # create arguments to be passed to the function. Here we are grouping # bunch of pixels to be processed by each thread pixel_groups = [(Y_name, C_name, sn, ind2_, range(i, i + n_pixels_per_process)) for i in range(0, d1 * d2 - n_pixels_per_process + 1, n_pixels_per_process)] A_ = np.zeros((d, nr + np.size(f, 0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value" ) dview = c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) # clean up for chunk in parallel_result: for pars in chunk: px, idxs_, a = pars A_[px, idxs_] = a dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend == 'single_thread': Cf_ = [Cf[idx_, :] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0))))) for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)): if px % 1000 == 0: print px if np.size(c) > 0: _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1, sn[px]**2 * T) if np.isscalar(a): A_[px, id2_] = a else: A_[px, id2_] = a.T else: raise Exception( 'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel' ) #% print 'Updated Spatial Components' A_ = threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_, axis=0) == 0) # remove empty components if np.size(ff) > 0: ff = ff[0] print('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_, list(ff), 1) C = np.delete(C, list(ff), 0) A_ = A_[:, :nr] A_ = coo_matrix(A_) # import pdb # pdb.set_trace() Y_resf = np.dot(Y, f.T) - A_.dot(coo_matrix(C[:nr, :]).dot(f.T)) print "Computing A_bas" A_bas = np.fmax(Y_resf / scipy.linalg.norm(f)**2, 0) # update baseline based on residual # A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update # baseline based on residual b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: # clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_, b, C
def update_temporal_components(Y, A, b, Cin, fin, bl=None, c1=None, g=None, sn=None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread', memory_efficient=False, debug=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for constrained foopsi. Choices are 'cvx': using cvxopt and picos (slow especially without the MOSEK solver) 'cvxpy': using cvxopt and cvxpy with the ECOS solver (faster, default) solvers: list string primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS'] Note -------- The temporal components are updated in parallel by default by forming of sequence of vertex covers. Returns -------- C: np.ndarray matrix of temporal components (K x T) f: np.array vector of temporal background (length T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input YrA: np.ndarray matrix of spatial component filtered raw data, after all contributions have been removed. YrA corresponds to the residual trace for each component and is used for faster plotting (K x T) """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d, T = np.shape(Y) nr = np.shape(A)[-1] if bl is None: bl = np.repeat(None, nr) if c1 is None: c1 = np.repeat(None, nr) if g is None: g = np.repeat(None, nr) if sn is None: sn = np.repeat(None, nr) A = scipy.sparse.hstack((A, coo_matrix(b))) S = np.zeros(np.shape(Cin)) Cin = np.vstack((Cin, fin)) C = Cin nA = np.squeeze(np.array(np.sum(np.square(A.todense()), axis=0))) #import pdb #pdb.set_trace() Cin = coo_matrix(Cin) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))) YA = (A.T.dot(Y).T) * spdiags(1. / nA, 0, nr + 1, nr + 1) AA = ((A.T.dot(A)) * spdiags(1. / nA, 0, nr + 1, nr + 1)).tocsr() YrA = YA - Cin.T.dot(AA) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value" ) dview = c[:n_processes] # use the number of processes Cin = np.array(Cin.todense()) for iter in range(ITER): O, lo = update_order(A.tocsc()[:, :nr]) P_ = [] for count, jo_ in enumerate(O): jo = np.array(list(jo_)) #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ytemp = YrA[:, jo.flatten()] + Cin[jo, :].T Ctemp = np.zeros((np.size(jo), T)) Stemp = np.zeros((np.size(jo), T)) btemp = np.zeros((np.size(jo), 1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo), kwargs['p'])) nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in = [(np.squeeze(np.array(Ytemp[:, jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] # import pdb # pdb.set_trace() if backend == 'ipyparallel': # if debug: results = dview.map_async(constrained_foopsi_parallel, args_in) results.get() for outp in results.stdout: print outp[:-1] sys.stdout.flush() for outp in results.stderr: print outp[:-1] sys.stderr.flush() else: results = dview.map_sync(constrained_foopsi_parallel, args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel, args_in) else: raise Exception( 'Backend not defined. Use either single_thread or ipyparallel' ) for chunk in results: pars = dict() C_, Sp_, Ytemp_, cb_, c1_, sn_, gn_, jj_ = chunk Ctemp[jj_, :] = C_[None, :] Stemp[jj_, :] = Sp_ Ytemp[:, jj_] = Ytemp_[:, None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_, :] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:] pars['b'] = cb_ pars['c1'] = c1_ pars['neuron_sn'] = sn_ pars['gn'] = gtemp[jj_, np.abs(gtemp[jj, :]) > 0] pars['neuron_id'] = jo[jj_] P_.append(pars) YrA -= (Ctemp - C[jo, :]).T * AA[jo, :] #YrA[:,jo] = Ytemp C[jo, :] = Ctemp.copy() S[jo, :] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count + 1])) + ' out of total ' + str( nr) + ' temporal components updated' ii = nr #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T #cc = np.maximum(YrA[:,ii],0) cc = np.maximum(YrA[:, ii] + np.atleast_2d(Cin[ii, :]).T, 0) YrA -= (cc - np.atleast_2d(Cin[ii, :]).T) * AA[ii, :] C[ii, :] = cc.T #YrA = YA - C.T.dot(AA) #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C, 'fro') / scipy.linalg.norm( C, 'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C f = C[nr:, :] C = C[:nr, :] YrA = np.array(YrA[:, :nr]).T P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C, f, S, bl, c1, sn, g, YrA #,P_
m = T.shape[0] percentiles = list(arange(0.5, 100.0, 0.5)) rng = RandomState(0) seeds = rng.random_integers(0, 2 ** 31 - 2, size=EX_NUM) for tr in trends: results = zeros((len(percentiles), len(T), EX_NUM)) * nan filename = 'adf_z_' + tr + '.npz' for i in range(EX_NUM): print("Experiment Number {0} for Trend {1}".format(i + 1, tr)) # Non parallel version # out = lmap(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m)) now = datetime.datetime.now() out = lview.map_sync(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m) # Prevent unnecessary results from accumulating lview.purge_results('all') rc.purge_everything() print(datetime.datetime.now() - now) quantiles = lmap(lambda x: percentile(x, percentiles), out) results[:, :, i] = array(quantiles).T if i % 50 == 0: savez(filename, trend=tr, results=results, percentiles=percentiles, T=T) savez(filename, trend=tr, results=results, percentiles=percentiles, T=T)
def extract_rois_patch(file_name,d1,d2,rf=5,stride = 2): not_completed, in_progress rf=6 stride = 2 idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf,stride = stride) perctl=95 n_components=2 tol=1e-6 max_iter=5000 args_in=[] for id_f,id_2d in zip(idx_flat,idx_2d): args_in.append((file_name, id_f,id_2d[0].shape, perctl,n_components,tol,max_iter)) st=time.time() try: if 1: c = Client() dview=c[:] file_res = dview.map_sync(nmf_patches, args_in) else: file_res = map(nmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() print time.time()-st A1=lil_matrix((d1*d2,len(file_res))) C1=[] A2=lil_matrix((d1*d2,len(file_res))) C2=[] A_tot=lil_matrix((d1*d2,n_components*len(file_res))) C_tot=[]; count_out=0 for count,f in enumerate(file_res): idx_,flt,ca,d=f print count_out #flt,ca,_=cse.order_components(coo_matrix(flt),ca) # A1[idx_,count]=flt[:,0][:,np.newaxis]/np.sqrt(np.sum(flt[:,0]**2)) # A2[idx_,count]=flt[:,1][:,np.newaxis] /np.sqrt(np.sum(flt[:,1]**2)) # C1.append(ca[0,:]) # C2.append(ca[1,:]) for ccc in range(n_components): A_tot[idx_,count_out]=flt[:,ccc][:,np.newaxis]/np.sqrt(np.sum(flt[:,ccc]**2)) C_tot.append(ca[ccc,:]) count_out+=1 # pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10) # pl.pause(.1) correlations=np.corrcoef(np.array(C_tot)) centers=cse.com(A_tot.todense(),d1,d2) distances=sklearn.metrics.pairwise.euclidean_distances(centers) pl.imshow((correlations>0.8) & (distances<10)) Yr=np.load('Yr.npy',mmap_mode='r') [d,T]=Yr.shape Y=np.reshape(Yr,(d1,d2,T),order='F') options=cse.utilities.CNMFSetParms(Y,p=0) res_merge=cse.merge_components(Yr,A_tot,[],np.array(C_tot),[],np.array(C_tot),[],options['temporal_params'],options['spatial_params'],thr=0.8) A_m,C_m,nr_m,merged_ROIs,S_m,bl_m,c1_m,sn_m,g_m=res_merge A_norm=np.array([A_m[:,rr].toarray()/np.sqrt(np.sum(A_m[:,rr].toarray()**2)) for rr in range(A_m.shape[-1])]).T options=cse.utilities.CNMFSetParms(Y,p=2,K=np.shape(A_m)[-1]) Yr,sn,g=cse.pre_processing.preprocess_data(Yr,**options['preprocess_params']) epsilon=1e-2 pixels_bckgrnd=np.nonzero(A_norm.sum(axis=-1)<epsilon)[0] f=np.sum(Yr[pixels_bckgrnd,:],axis=0) A2,b2,C2 = cse.spatial.update_spatial_components(Yr, C_m, f, A_m, sn=sn, **options['spatial_params']) A_or2, C_or2, srt2 = cse.utilities.order_components(A2,C2) A_norm2=np.array([A_or2[:,rr]/np.sqrt(np.sum(A_or2[:,rr]**2)) for rr in range(A_or2.shape[-1])]).T options['temporal_params']['p'] = 2 # set it back to original value to perform full deconvolution C2,f2,S2,bl2,c12,neurons_sn2,g21,YrA = cse.temporal.update_temporal_components(Yr,A2,b2,C2,f,bl=None,c1=None,sn=None,g=None,**options['temporal_params']) A_or, C_or, srt = cse.utilities.order_components(A2,C2) return A1,A2,C1
def update_spatial_components(Y, C, f, A_in, sn=None, d1=None, d2=None, min_size=3, max_size=8, dist=3, method='ellipse', expandCore=None, backend='single_thread', n_processes=4, n_pixels_per_process=128 ): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'ipyparallel', 'single_thread' single_thread:no parallelization. It can be used with small datasets. ipyparallel: uses ipython clusters and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore = iterate_structure(generate_binary_structure(2, 1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') if Y.ndim<2 and not type(Y) is str: Y = np.atleast_2d(Y) if Y.shape[1] == 1: raise Exception('Dimension of Matrix Y must be pixels x time') C = np.atleast_2d(C) if C.shape[1] == 1: raise Exception('Dimension of Matrix C must be neurons x time') f = np.atleast_2d(f) if f.shape[1] == 1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape) == 1: A_in = np.atleast_2d(A_in).T if A_in.shape[0] == 1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C, f)) # create matrix that include background components [d, T] = np.shape(Y) if n_pixels_per_process > d: raise Exception( 'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.') nr, _ = np.shape(C) # number of neurons IND = determine_search_location( A_in, d1, d2, method=method, min_size=min_size, max_size=max_size, dist=dist, expandCore=expandCore) print " find search location" ind2_ = [np.hstack((np.where(iid_)[0], nr + np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else [] for iid_ in IND] folder = tempfile.mkdtemp() # use the ipyparallel package, you need to start a cluster server # (ipcluster command) in order to use it if backend == 'ipyparallel': C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name, Cf) if type(Y) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename # if not create a memory mapped version (necessary for parallelization) elif type(Y) is str: Y_name = Y else: Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name, Y) Y,_,_,_=load_memmap(Y_name) # create arguments to be passed to the function. Here we are grouping # bunch of pixels to be processed by each thread pixel_groups = [(Y_name, C_name, sn, ind2_, range(i, i + n_pixels_per_process)) for i in range(0, d1 * d2 - n_pixels_per_process + 1, n_pixels_per_process)] A_ = np.zeros((d, nr + np.size(f, 0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview = c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) # clean up for chunk in parallel_result: for pars in chunk: px, idxs_, a = pars A_[px, idxs_] = a dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend == 'single_thread': Cf_ = [Cf[idx_, :] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0))))) for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)): if px % 1000 == 0: print px if np.size(c) > 0: _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1, sn[px]**2 * T) if np.isscalar(a): A_[px, id2_] = a else: A_[px, id2_] = a.T else: raise Exception( 'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel') #% print 'Updated Spatial Components' A_ = threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_, axis=0) == 0) # remove empty components if np.size(ff) > 0: ff = ff[0] print('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_, list(ff), 1) C = np.delete(C, list(ff), 0) A_ = A_[:, :nr] A_ = coo_matrix(A_) # import pdb # pdb.set_trace() Y_resf = np.dot(Y, f.T) - A_.dot(coo_matrix(C[:nr, :]).dot(f.T)) print "Computing A_bas" A_bas = np.fmax(Y_resf / scipy.linalg.norm(f)**2, 0) # update baseline based on residual # A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update # baseline based on residual b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: # clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_, b, C
def update_spatial_components_parallel(Y,C,f,A_in,sn=None, d1=None,d2=None,min_size=3,max_size=8, dist=3, method = 'ellipse', expandCore = None,backend='single_thread',n_processes=4,n_pixels_per_process=128, memory_efficient=False): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'multiprocessing', 'threading', 'ipyparallel', 'single_thread' single_thread:no parallelization. It shoul dbe used in most cases. multiprocessing or threading: use the corresponding python threading package. It has known issues on mac OS. Not to be used in most situations. ipyparallel: starts an ipython cluster and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread memory_efficient [bool] whether or not to reduce memory usage (at the expense of increased computational time) method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore=iterate_structure(generate_binary_structure(2,1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') Y=np.atleast_2d(Y) if Y.shape[1]==1: raise Exception('Dimension of Matrix Y must be pixels x time') C=np.atleast_2d(C) if C.shape[1]==1: raise Exception('Dimension of Matrix C must be neurons x time') f=np.atleast_2d(f) if f.shape[1]==1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape)==1: A_in=np.atleast_2d(A_in).T if A_in.shape[0]==1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C,f)) # create matrix that include background components [d,T] = np.shape(Y) if n_pixels_per_process > d: raise Exception('The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.') nr,_ = np.shape(C) # number of neurons IND = determine_search_location(A_in,d1,d2,method = method, min_size = min_size, max_size = max_size, dist = dist, expandCore = expandCore) print " find search location" ind2_ =[ np.hstack( (np.where(iid_)[0] , nr+np.arange(f.shape[0])) ) if np.size(np.where(iid_)[0])>0 else [] for iid_ in IND] folder = tempfile.mkdtemp() if backend == 'multiprocessing' or backend == 'threading': A_name = os.path.join(folder, 'A_temp') # Pre-allocate a writeable shared memory map as a container for the # results of the parallel computation print "Create Matrix for dumping data from matrix A and C for parallel computation...." A_ = np.memmap(A_name, dtype=A_in.dtype,shape=(d,nr+np.size(f,0)), mode='w+') pixels_name = os.path.join(folder, 'pixels') C_name = os.path.join(folder, 'C_temp') # Dump the input data to disk to free the memory dump(Y, pixels_name) dump(Cf, C_name) # use mempry mapped versions of C and Y Y = load(pixels_name, mmap_mode='r') Cf = load(C_name, mmap_mode='r') pixel_groups=[range(i,i+n_pixels_per_process) for i in range(0,Y.shape[0]-n_pixels_per_process+1,n_pixels_per_process)] # Fork the worker processes to perform computation concurrently print "start parallel pool..." sys.stdout.flush() Parallel(n_jobs=n_processes, backend=backend,verbose=100,max_nbytes=None)(delayed(lars_regression_noise_parallel)(Y,Cf,A_,sn,i,ind2_) for i in pixel_groups) # if n_pixels_per_process is not a multiple of Y.shape[0] run on remaining pixels pixels_remaining= Y.shape[0] % n_pixels_per_process if pixels_remaining>0: print "Running deconvolution for remaining pixels:" + str(pixels_remaining) lars_regression_noise_parallel(Y,Cf,A_,sn,range(Y.shape[0]-pixels_remaining,Y.shape[0]),ind2_,positive=1) A_=np.array(A_) elif backend == 'ipyparallel': # use the ipyparallel package, you need to start a cluster server (ipcluster command) in order to use it C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name,Cf) if type(Y) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name=Y.filename else: # if not create a memory mapped version (necessary for parallelization) Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name,Y) Y=np.load(Y_name,mmap_mode='r') # create arguments to be passed to the function. Here we are grouping bunch of pixels to be processed by each thread pixel_groups=[(Y_name,C_name,sn,ind2_,range(i,i+n_pixels_per_process)) for i in range(0,d1*d2-n_pixels_per_process+1,n_pixels_per_process)] A_ = np.zeros((d,nr+np.size(f,0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) for chunk in parallel_result: for pars in chunk: px,idxs_,a=pars A_[px,idxs_]=a #clean up dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend=='single_thread': Cf_=[Cf[idx_,:] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d,nr)),np.zeros((d,np.size(f,0))))) for c,y,s,id2_,px in zip(Cf_,Y,sn,ind2_,range(d)): if px%1000==0: print px if np.size(c)>0: _, _, a, _ , _= lars_regression_noise(y, np.array(c.T), 1, sn[px]**2*T) if np.isscalar(a): A_[px,id2_]=a else: A_[px,id2_]=a.T else: raise Exception('Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel') #% print 'Updated Spatial Components' A_=threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_,axis=0)==0); # remove empty components if np.size(ff)>0: ff = ff[0] warn('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_,list(ff),1) C = np.delete(C,list(ff),0) A_ = A_[:,:nr] A_=coo_matrix(A_) if memory_efficient: print "Using memory efficient computation (slow but memory preserving)" A__=coo_matrix(A_,dtype=np.float32) C__=coo_matrix(C[:nr,:],dtype=np.float32) Y_res_name = os.path.join(folder, 'Y_res_temp.npy') Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='w+', shape=Y.shape) Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='r+', shape=Y.shape) print "computing residuals" Y_res[:] = -A__.dot(C__).todense()[:] Y_res[:]+=Y else: print "Using memory trade-off computation (good use of memory if input is memmaped)" Y_res = Y - A_.dot(coo_matrix(C[:nr,:])) print "Computing A_bas" A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update baseline based on residual Y_res[:]=1 b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: #clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_,b,C
def run_CNMF_patches(file_name, shape, options, rf=16, stride = 4, n_processes=2, backend='single_thread'): """ Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running Parameters ---------- file_name: string full path to an npy file (2D, pixels x time) containing the movie shape: tuple of thre elements dimensions of the original movie across y, x, and time options: dictionary containing all the parameters for the various algorithms rf: int half-size of the square patch in pixel stride: int amount of overlap between patches backend: string 'ipyparallel' or 'single_thread' Returns ------- A_tot: C_tot: sn_tot: optional_outputs: """ (d1,d2,T)=shape d=d1*d2 K=options['init_params']['K'] idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf, stride = stride) args_in=[] for id_f,id_2d in zip(idx_flat[:],idx_2d[:]): args_in.append((file_name, id_f,id_2d[0].shape, options)) print len(idx_flat) st=time.time() if backend is 'ipyparallel': try: c = Client() dview=c[:n_processes] file_res = dview.map_sync(cnmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend is 'single_thread': file_res = map(cnmf_patches, args_in) else: raise Exception('Backend unknown') print time.time()-st # extract the values from the output of mapped computation num_patches=len(file_res) A_tot=scipy.sparse.csc_matrix((d,K*num_patches)) C_tot=np.zeros((K*num_patches,T)) sn_tot=np.zeros((d1*d2)) b_tot=[] f_tot=[] bl_tot=[] c1_tot=[] neurons_sn_tot=[] g_tot=[] idx_tot=[]; shapes_tot=[] id_patch_tot=[] count=0 patch_id=0 print 'Transforming patches into full matrix' for idx_,shapes,A,b,C,f,S,bl,c1,neurons_sn,g,sn,_ in file_res: sn_tot[idx_]=sn b_tot.append(b) f_tot.append(f) bl_tot.append(bl) c1_tot.append(c1) neurons_sn_tot.append(neurons_sn) g_tot.append(g) idx_tot.append(idx_) shapes_tot.append(shapes) for ii in range(np.shape(A)[-1]): new_comp=A.tocsc()[:,ii]/np.sqrt(np.sum(np.array(A.tocsc()[:,ii].todense())**2)) if new_comp.sum()>0: A_tot[idx_,count]=new_comp C_tot[count,:]=C[ii,:] id_patch_tot.append(patch_id) count+=1 patch_id+=1 A_tot=A_tot[:,:count] C_tot=C_tot[:count,:] optional_outputs=dict() optional_outputs['b_tot']=b_tot optional_outputs['f_tot']=f_tot optional_outputs['bl_tot']=bl_tot optional_outputs['c1_tot']=c1_tot optional_outputs['neurons_sn_tot']=neurons_sn_tot optional_outputs['g_tot']=g_tot optional_outputs['idx_tot']=idx_tot optional_outputs['shapes_tot']=shapes_tot optional_outputs['id_patch_tot']= id_patch_tot return A_tot,C_tot,sn_tot, optional_outputs