Exemple #1
0
def motion_correct_parallel(file_names,fr,template=None,margins_out=0,max_shift_w=5, max_shift_h=5,remove_blanks=False,apply_smooth=True,backend='single_thread'):
    """motion correct many movies usingthe ipyparallel cluster
    Parameters
    ----------
    file_names: list of strings
        names of he files to be motion corrected
    fr: double
        fr parameters for calcblitz movie 
    margins_out: int
        number of pixels to remove from the borders    
    
    Return
    ------
    base file names of the motion corrected files
    """
    args_in=[];
    for f in file_names:
        args_in.append((f,fr,margins_out,template,max_shift_w, max_shift_h,remove_blanks,apply_smooth))
        
    try:
        
        if backend is 'ipyparallel':
            
            c = Client()   
            dview=c[:]
            file_res = dview.map_sync(process_movie_parallel, args_in)                         
            dview.results.clear()       
            c.purge_results('all')
            c.purge_everything()
            c.close()    

        elif backend is 'single_thread':
            
            file_res = map(process_movie_parallel, args_in)        
                 
        else:
            raise Exception('Unknown backend')
        
    except :   
        
        try:
            if backend is 'ipyparallel':
                
                dview.results.clear()       
                c.purge_results('all')
                c.purge_everything()
                c.close()
        except UnboundLocalError as uberr:

            print 'could not close client'

        raise
                                    
    return file_res
Exemple #2
0
def extract_rois_patch(file_name, d1, d2, rf=5, stride=5):
    idx_flat, idx_2d = extract_patch_coordinates(d1, d2, rf=rf, stride=stride)
    perctl = 95
    n_components = 2
    tol = 1e-6
    max_iter = 5000
    args_in = []
    for id_f, id_2d in zip(idx_flat, idx_2d):
        args_in.append((file_name, id_f, id_2d[0].shape, perctl, n_components,
                        tol, max_iter))
    st = time.time()
    print len(idx_flat)
    try:
        if 1:
            c = Client()
            dview = c[:]
            file_res = dview.map_sync(nmf_patches, args_in)
        else:
            file_res = map(nmf_patches, args_in)
    finally:
        dview.results.clear()
        c.purge_results('all')
        c.purge_everything()
        c.close()

    print time.time() - st

    A1 = lil_matrix((d1 * d2, len(file_res)))
    C1 = []
    A2 = lil_matrix((d1 * d2, len(file_res)))
    C2 = []
    for count, f in enumerate(file_res):
        idx_, flt, ca, d = f
        #flt,ca,_=cse.order_components(coo_matrix(flt),ca)
        A1[idx_, count] = flt[:, 0][:, np.newaxis]
        A2[idx_, count] = flt[:, 1][:, np.newaxis]
        C1.append(ca[0, :])
        C2.append(ca[1, :])
#        pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10)
#        pl.pause(.1)

    return A1, A2, C1, C2
def extract_rois_patch(file_name,d1,d2,rf=5,stride = 5):
    idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf,stride = stride)
    perctl=95
    n_components=2
    tol=1e-6
    max_iter=5000
    args_in=[]    
    for id_f,id_2d in zip(idx_flat,idx_2d):        
        args_in.append((file_name, id_f,id_2d[0].shape, perctl,n_components,tol,max_iter))
    st=time.time()
    print len(idx_flat)
    try:
        if 1:
            c = Client()   
            dview=c[:]
            file_res = dview.map_sync(nmf_patches, args_in)                         
        else:
            file_res = map(nmf_patches, args_in)                         
    finally:
        dview.results.clear()   
        c.purge_results('all')
        c.purge_everything()
        c.close()
    
    print time.time()-st
    
    A1=lil_matrix((d1*d2,len(file_res)))
    C1=[]
    A2=lil_matrix((d1*d2,len(file_res)))
    C2=[]
    for count,f in enumerate(file_res):
        idx_,flt,ca,d=f
        #flt,ca,_=cse.order_components(coo_matrix(flt),ca)
        A1[idx_,count]=flt[:,0][:,np.newaxis]        
        A2[idx_,count]=flt[:,1][:,np.newaxis]        
        C1.append(ca[0,:])
        C2.append(ca[1,:])
#        pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10)
#        pl.pause(.1)
        
        
    return A1,A2,C1,C2
def run_CNMF_patches(file_name, shape, options, rf=16, stride = 4, n_processes=2, backend='single_thread',memory_fact=1):
    """Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running
        
    Parameters
    ----------        
    file_name: string
        full path to an npy file (2D, pixels x time) containing the movie        
        
    shape: tuple of thre elements
        dimensions of the original movie across y, x, and time 
    
    options:
        dictionary containing all the parameters for the various algorithms
    
    rf: int 
        half-size of the square patch in pixel
    
    stride: int
        amount of overlap between patches
        
    backend: string
        'ipyparallel' or 'single_thread'
    
    n_processes: int
        nuber of cores to be used (should be less than the number of cores started with ipyparallel)
        
    memory_fact: double
        unitless number accounting how much memory should be used. It represents the fration of patch processed in a single thread. You will need to try different values to see which one would work
    
    
    Returns
    -------
    A_tot: matrix containing all the componenents from all the patches
    
    C_tot: matrix containing the calcium traces corresponding to A_tot
    
    sn_tot: per pixel noise estimate
    
    optional_outputs: set of outputs related to the result of CNMF ALGORITHM ON EACH patch   
    """
    (d1,d2,T)=shape
    d=d1*d2
    K=options['init_params']['K']
    
    options['preprocess_params']['backend']='single_thread' 
    options['preprocess_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact)
    options['spatial_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact)
    options['temporal_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact)
    options['spatial_params']['backend']='single_thread'
    options['temporal_params']['backend']='single_thread'

    
    idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf, stride = stride)
#    import pdb 
#    pdb.set_trace()
    args_in=[]    
    for id_f,id_2d in zip(idx_flat[:],idx_2d[:]):        
        args_in.append((file_name, id_f,id_2d[0].shape, options))

    print len(idx_flat)

    st=time.time()        
    
    if backend is 'ipyparallel':

        try:

            c = Client()   
            dview=c[:n_processes]
            file_res = dview.map_sync(cnmf_patches, args_in)        
            dview.results.clear()   
            c.purge_results('all')
            c.purge_everything()
            c.close()         
        except:
            print('Something went wrong')  
            raise
        finally:
            print('You may think that it went well but reality is harsh')
                    

    elif backend is 'single_thread':

        file_res = map(cnmf_patches, args_in)                         

    else:
        raise Exception('Backend unknown')
            
      
    print time.time()-st
    
    
    # extract the values from the output of mapped computation
    num_patches=len(file_res)
    
    A_tot=scipy.sparse.csc_matrix((d,K*num_patches))
    B_tot=scipy.sparse.csc_matrix((d,num_patches))
    C_tot=np.zeros((K*num_patches,T))
    F_tot=np.zeros((num_patches,T))
    mask=np.zeros(d)
    sn_tot=np.zeros((d1*d2))
    b_tot=[]
    f_tot=[]
    bl_tot=[]
    c1_tot=[]
    neurons_sn_tot=[]
    g_tot=[]    
    idx_tot=[];
    shapes_tot=[]    
    id_patch_tot=[]
    
    count=0  
    patch_id=0

    print 'Transforming patches into full matrix'
    
    for idx_,shapes,A,b,C,f,S,bl,c1,neurons_sn,g,sn,_ in file_res:
    
        sn_tot[idx_]=sn
        b_tot.append(b)
        f_tot.append(f)
        bl_tot.append(bl)
        c1_tot.append(c1)
        neurons_sn_tot.append(neurons_sn)
        g_tot.append(g)
        idx_tot.append(idx_)
        shapes_tot.append(shapes)
        mask[idx_] += 1
        F_tot[patch_id,:]=f
        B_tot[idx_,patch_id]=b        
        
        for ii in range(np.shape(A)[-1]):            
            new_comp=A.tocsc()[:,ii]/np.sqrt(np.sum(np.array(A.tocsc()[:,ii].todense())**2))
            if new_comp.sum()>0:
                A_tot[idx_,count]=new_comp
                C_tot[count,:]=C[ii,:]   
                id_patch_tot.append(patch_id)
                count+=1
        
        patch_id+=1      

    A_tot=A_tot[:,:count]
    C_tot=C_tot[:count,:]  
    
    optional_outputs=dict()
    optional_outputs['b_tot']=b_tot
    optional_outputs['f_tot']=f_tot
    optional_outputs['bl_tot']=bl_tot
    optional_outputs['c1_tot']=c1_tot
    optional_outputs['neurons_sn_tot']=neurons_sn_tot
    optional_outputs['g_tot']=g_tot
    optional_outputs['idx_tot']=idx_tot
    optional_outputs['shapes_tot']=shapes_tot
    optional_outputs['id_patch_tot']= id_patch_tot
    optional_outputs['B'] = B_tot
    optional_outputs['F'] = F_tot
    optional_outputs['mask'] = mask
    
    Im = scipy.sparse.csr_matrix((1./mask,(np.arange(d),np.arange(d))))
    Bm = Im.dot(B_tot)
    A_tot = Im.dot(A_tot)
    f = np.mean(F_tot,axis=0)

    for iter in range(10):
        b = Bm.dot(F_tot.dot(f))/np.sum(f**2)  
        f = np.dot((Bm.T.dot(b)).T,F_tot)/np.sum(b**2)

    
    return A_tot,C_tot,b,f,sn_tot, optional_outputs
def update_temporal_components_parallel(Y, A, b, Cin, fin, bl = None,  c1 = None, g = None,  sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, **kwargs):
    """Update temporal components and background given spatial components using a block coordinate descent approach.


    Parameters
    -----------    

    Y: np.ndarray (2D)
        input data with time in the last axis (d x T)
    A: sparse matrix (crc format)
        matrix of temporal components (d x K)
    b: ndarray (dx1)
        current estimate of background component
    Cin: np.ndarray
        current estimate of temporal components (K x T)   
    fin: np.ndarray
        current estimate of temporal background (vector of length T)
    g:  np.ndarray
        Global time constant (not used)
    bl: np.ndarray
       baseline for fluorescence trace for each column in A
    c1: np.ndarray
       initial concentration for each column in A
    g:  np.ndarray       
       discrete time constant for each column in A
    sn: np.ndarray
       noise level for each column in A       
    ITER: positive integer
        Maximum number of block coordinate descent loops. 
    method_foopsi: string
        Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment.               
    n_processes: int
        number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster.
    backend: 'str'
        single_thread no parallelization
        ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type 
        ipcluster -n 6, where 6 is the number of processes). 
    memory_efficient: Bool
        whether or not to optimize for memory usage (longer running times). nevessary with very large datasets  
    **kwargs: dict
        all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are      
    p: int
        order of the autoregression model
    method: [optional] string
        solution method for basis projection pursuit cvx or spgl1 or debug for fast but possibly imprecise temporal components    
  
    Returns
    --------
    
    C:     np.matrix
            matrix of temporal components (K x T)
    f:     np.array
            vector of temporal background (length T) 
    Y_res: np.ndarray
            matrix with current residual (d x T)
    S:     np.ndarray            
            matrix of merged deconvolved activity (spikes) (K x T)
    bl:  float  
            same as input    
    c1:  float
            same as input    
    g:   float
            same as input    
    sn:  float
            same as input 
    
    """
    if not kwargs.has_key('p') or kwargs['p'] is None:
        raise Exception("You have to provide a value for p")

    d,T = np.shape(Y);
    
    
    
    nr = np.shape(A)[-1]
    
    
    if  bl is None:
        bl=np.repeat(None,nr)
        
    if  c1 is None:
        c1=np.repeat(None,nr)

    if  g is None:
        g=np.repeat(None,nr)

    if  sn is None:
        sn=np.repeat(None,nr)                        
    
    A = scipy.sparse.hstack((A,coo_matrix(b)))
    S = np.zeros(np.shape(Cin));
    Cin =  np.vstack((Cin,fin));
    C = Cin;
    #%
    nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0)))
    
    
    Sp = np.zeros((nr,T))
    #YrA = Y.T*A - Cin.T*(A.T*A);
#    Y=np.matrix(Y)
#    C=np.matrix(C)
#    Cin=np.matrix(Cin)
#    YrA2 = Y.T*A - Cin.T*(A.T*A);

    Cin=coo_matrix(Cin)
    YrA = (A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))
    
    
    if backend == 'ipyparallel':
        try: # if server is not running and raise exception if not installed or not started        
            from ipyparallel import Client
            c = Client()
        except:
            print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and  starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes"
            raise
    
        if len(c) <  n_processes:
            print len(c)
            raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value")            
        
        dview=c[:n_processes] # use the number of processes
    
    Cin=np.array(Cin.todense())    
    for iter in range(ITER):
        O,lo = update_order(A.tocsc()[:,:nr])
        P_=[];
        for count,jo_ in enumerate(O):
            jo=np.array(list(jo_))           
            Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T
            Ctemp = np.zeros((np.size(jo),T))
            Stemp = np.zeros((np.size(jo),T))
            btemp = np.zeros((np.size(jo),1))
            sntemp = btemp.copy()
            c1temp = btemp.copy()
            gtemp = np.zeros((np.size(jo),kwargs['p']));
            nT = nA[jo]            
            
            
#            args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))]
            args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))]
            
            if backend == 'ipyparallel':                    
                
                results = dview.map_sync(constrained_foopsi_parallel,args_in)        

            elif backend == 'single_thread':
                
                results = map(constrained_foopsi_parallel,args_in)            
                
            else:
                
                raise Exception('Backend not defined. Use either single_thread or ipyparallel')
                
            for chunk in results:
                #pars=dict(kwargs)
                C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk                    
                Ctemp[jj_,:] = C_[None,:]
                                
                Stemp[jj_,:] = Sp_               
                Ytemp[:,jj_] = Ytemp_[:,None]            
                btemp[jj_] = cb_
                c1temp[jj_] = c1_
                sntemp[jj_] = sn_   
                gtemp[jj_,:] = gn_.T  
                   
                bl[jo[jj_]] = cb_
                c1[jo[jj_]] = c1_
                sn[jo[jj_]] = sn_
                g[jo[jj_]]  = gtemp[jj,:]#[jj_,np.abs(gtemp[jj,:])>0] 
                             
                
                #pars['b'] = cb_
#                pars['c1'] = c1_                 
#                pars['neuron_sn'] = sn_
#                pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] 
#                
##                for jj = 1:length(O{jo})
##                    P.gn(O{jo}(jj)) = {gtemp(jj,abs(gtemp(jj,:))>0)'};
##                end
#                pars['neuron_id'] = jo[jj_]
#                P_.append(pars)
            
            YrA[:,jo] = Ytemp
            C[jo,:] = Ctemp            
            S[jo,:] = Stemp
            
#            if (np.sum(lo[:jo])+1)%1 == 0:
            print str(np.sum(lo[:count])) + ' out of total ' + str(nr) + ' temporal components updated \n'
        
        ii=nr        
        YrA[:,ii] = YrA[:,ii] + nA[ii]*np.atleast_2d(Cin[ii,:]).T
        cc = np.maximum(YrA[:,ii]/nA[ii],0)
        C[ii,:] = cc[:].T
        YrA[:,ii] = YrA[:,ii] - nA[ii]*np.atleast_2d(C[ii,:]).T 
        
        if backend == 'ipyparallel':       
            dview.results.clear()   
            c.purge_results('all')
            c.purge_everything()

        if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3:
            # stop if the overall temporal component does not change by much
            print "stopping: overall temporal component not changing significantly"
            break
        else:
            Cin = C
    
    Y_res = Y - A*C # this includes the baseline term
    
    f = C[nr:,:]
    C = C[:nr,:]
        
    P_ = sorted(P_, key=lambda k: k['neuron_id']) 
    if backend == 'ipyparallel':      
        c.close()
    
    return C,f,Y_res,S,bl,c1,sn,g
Exemple #6
0
def specific_index(matrix, shape, indexes_file, indexes_name, method = 1):
    import scipy.io as sio
    
    
    workspace = sio.loadmat(indexes_file)
    shape = int(shape)
    covmat = np.array(matrix)
    covmat = np.reshape(covmat,(shape,shape))
    
    det_method = """def det():
        N = len(indexes_list)
        dets = numpy.zeros(N,dtype=numpy.float32)
        ent = numpy.zeros(N,dtype=numpy.float32)
        dets[:] = 0 #numpy.nan 
        ent[:] = 0 #numpy.nan
        
        
        if len(indexes_list) > 0:
            idx_mat = indexes_list
        else:    
            return {'dets':[],'ents':[],'idx':[]}
        matrices = []
        idxs = []
        i=0
        for ind in idx_mat:
            idx = ind[0][0] - 1
            matrix = covmat[idx][:,idx]
            dets[i] = numpy.linalg.det(matrix)
            ent[i] = entropy(k,dets[i])
            matrices.append(matrix)
            idxs.append(ind)
            i += 1
        return {'dets':dets,'ents':ent,'idx':idxs}"""

    if method == 1:
        entropy_method = """def entropy(x,y):
        return 0.5*numpy.log((2*numpy.pi * numpy.exp(1))**(x)*y)
        """
    elif method == 2:
        entropy_method = """def entropy(x,y):
        return 0.5*numpy.log(numpy.abs((2*numpy.pi * numpy.exp(1))**(x)*y))
        """
    elif method == 3:
        entropy_method = """def entropy(x,y):
        return #0.5*numpy.log(numpy.abs((2*numpy.pi * numpy.exp(1))**(x)*y))
        """
    from ipyparallel import Client
    rc = Client(profile='brincolab-cluster')
    rc.purge_everything()  #Limpiar la sesion
    
    dview = rc[:] # use all engines
    #dview.use_pickle()
    print(len(dview))
    #dview.block = True #Modo block
    dview['covmat'] = covmat #Distribuimos la matriz de covarianza
    dview.execute(det_method)
    dview.execute(entropy_method)
    
    indexes_list = workspace[indexes_name]
    dview.scatter('indexes_list',indexes_list)
    dview.execute('res = det()', block=True)
    responses = dview.gather('res', block=True)
    
    dets = np.array([], dtype=np.float32)
    ents = np.array([], dtype=np.float32)
    idxs = []
    for el in responses:
        #print(el.keys())
        dets = np.append(dets,el['dets'])
        ents = np.append(ents,el['ents'])    
        for subel in el['idx']:
            idxs.append(subel)
            
    dets = dets[~np.isnan(dets)]
    ents = ents[~np.isnan(ents)]
    print(len(dets),len(ents),len(idxs))
    print()
    
    rc.purge_everything()  #Limpiar la sesion
    
    return (dets,ents,idxs)

    #ar = dview.apply_async(det)
    #ar.wait()
    #responses = ar.get()
    
    return 0
Exemple #7
0
def get_all_determinants(matrix, shape, method = 1):
    
    det_method = """def det(N):
        dets = numpy.zeros(N,dtype=numpy.float32)
        ent = numpy.zeros(N,dtype=numpy.float32)
        dets[:] = -1 #numpy.nan 
        ent[:] = -1 #numpy.nan
        
        if len(index_mat) > 0:
            idx_mat = index_mat[0]
        else:    
            return {'dets':[],'ents':[],'idx':[]}
        matrices = []
        idxs = []
        i=0
        for ind in idx_mat:
            idx = list(ind)
            matrix = covmat[idx][:,idx]
            dets[i] = numpy.linalg.det(matrix)
            ent[i] = entropy(k,dets[i])
            matrices.append(matrix)
            idxs.append(ind)
            i += 1
        return {'dets':dets,'ents':ent,'idx':idxs}"""

    if method == 1:
        entropy_method = """def entropy(x,y):
        return 0.5*numpy.log((2*numpy.pi * numpy.exp(1))**(x)*y)
        """
    elif method == 2:
        entropy_method = """def entropy(x,y):
        return 0.5*numpy.log(numpy.abs((2*numpy.pi * numpy.exp(1))**(x)*y))
        """
    ## Funcion para calcular el determinante
    
    shape = int(shape)
    covmat = np.array(matrix)
    covmat = np.reshape(covmat,(shape,shape))
    
    N = covmat.shape[0] ## Cambiar lectura de matriz
    k = N-1
     #Aca corresponde ingresar la matriz de covarianzas ya calculada
    arr = list(range(0,N)) #Arreglo para generar las mascaras de las submatrices cuadradas

    from ipyparallel import Client
    rc = Client(profile='brincolab-cluster')
    rc.purge_everything()  #Limpiar la sesion
    
    dview = rc[:] # use all engines
    #dview.use_pickle()
    print(len(dview))
    #dview.block = True #Modo block
    dview['covmat'] = covmat #Distribuimos la matriz de covarianza
    dview['k'] = k # N-1
    dview.execute(det_method)
    dview.execute(entropy_method)
    

    iteraciones = {}
    det_calcular = {}
    indices_scatter = {}
    for i in range(2,N):
        iteraciones[i] = itertools.combinations(arr, i)
        iters = itertools.combinations(arr, i)
        n = int(factorial(N) / factorial(i) / factorial(N-i))
        n_per_process = ceil(int(n)/len(rc.ids) )
        print(i, n, n_per_process)
        det_calcular[i] = (n,n_per_process)
        indexes = []
        for j in range(0,n,n_per_process):
            sl = islice(iters, j , j + n_per_process )
            indexes.append(sl)
        indices_scatter[i] = indexes
    
    with dview.sync_imports():
        import numpy
        #import determinantes
    
    responses = {}
    for i in range(2,N):
        n_dets = det_calcular[i][0]
        n_scatters = det_calcular[i][1]
        print(n_scatters)
        dview.scatter('index_mat', indices_scatter[i], block=True)
        dview.execute('res = det({})'.format(n_scatters), block=True)
        responses[i] = dview.gather('res', block=True)
        
    dets = np.array([], dtype=np.float32)
    ents = np.array([], dtype=np.float32)
    idxs = []
    #print(idxs)
    for i in tqdm(range(2,N)):
        for el in responses[i]:
            #print(el.keys())
            if -1 in el['dets']:
                mask = np.array(el['dets']) == -1
                #print(mask)
                dets = np.append(dets,el['dets'][~mask])
                ents = np.append(ents,el['ents'][~mask])
            else:
                dets = np.append(dets,el['dets'])
                ents = np.append(ents,el['ents'])
            for subel in el['idx']:
                idxs.append(subel)
        #for j in responses[i]['dets']:
        #    print(j)
        
    dets = dets[~numpy.isnan(dets)]
    ents = ents[~numpy.isnan(ents)]
    
    
    print(len(dets),len(ents),len(idxs))
    mask = dets == -1
    print(len(dets[mask]))
    
    rc.purge_everything()  #Limpiar la sesion
    
    return (dets,ents,idxs)
Exemple #8
0
rng = RandomState(0)
seeds = rng.random_integers(0, 2**31 - 2, size=EX_NUM)

for tr in trends:
    results = zeros((len(percentiles), len(T), EX_NUM)) * nan
    filename = "adf_z_" + tr + ".npz"

    for i in range(EX_NUM):
        print("Experiment Number {0} for Trend {1}".format(i + 1, tr))
        # Non parallel version
        # args = (T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m)
        # out = [wrapper(a, b, c, d) for a, b, c, d in args]
        now = datetime.datetime.now()
        out = lview.map_sync(wrapper, T, [tr] * m, [EX_SIZE] * m,
                             [seeds[i]] * m)
        # Prevent unnecessary results from accumulating
        lview.purge_results("all")
        rc.purge_everything()
        print(datetime.datetime.now() - now)
        quantiles = [percentile(x, percentiles) for x in out]
        results[:, :, i] = array(quantiles).T

        if i % 50 == 0:
            savez(filename,
                  trend=tr,
                  results=results,
                  percentiles=percentiles,
                  T=T)

    savez(filename, trend=tr, results=results, percentiles=percentiles, T=T)
Exemple #9
0
def update_spatial_components_parallel(Y,
                                       C,
                                       f,
                                       A_in,
                                       sn=None,
                                       d1=None,
                                       d2=None,
                                       min_size=3,
                                       max_size=8,
                                       dist=3,
                                       method='ellipse',
                                       expandCore=None,
                                       backend='single_thread',
                                       n_processes=4,
                                       n_pixels_per_process=128,
                                       memory_efficient=False):
    """update spatial footprints and background     
    through Basis Pursuit Denoising

    for each pixel i solve the problem 
        [A(i,:),b(i)] = argmin sum(A(i,:))
    subject to 
        || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T);
    
    for each pixel the search is limited to a few spatial components
    
    Parameters
    ----------   
    Y: np.ndarray (2D)
        movie, raw data in 2D (pixels x time).
    C: np.ndarray
        calcium activity of each neuron. 
    f: np.ndarray
        temporal profile  of background activity.
    Ain: np.ndarray
        spatial profile of background activity.    
        
    d1: [optional] int
        x movie dimension
        
    d2: [optional] int
        y movie dimension

    min_size: [optional] int
                
    max_size: [optional] int
                
    dist: [optional] int
        
        
    sn: [optional] float
        noise associated with each pixel if known
        
    n_processes: [optional] int
        number of threads to use when the backend is multiprocessing,threading, or ipyparallel
        
    backend [optional] str
        'multiprocessing', 'threading', 'ipyparallel', 'single_thread' 
        single_thread:no parallelization. It shoul dbe used in most cases.         
        multiprocessing or threading: use the corresponding python threading package. It has known issues on mac OS. Not to be used in most situations.
        ipyparallel: starts an ipython cluster and then send jobs to each of them 
        
    
    n_pixels_per_process: [optional] int
        number of pixels to be processed by each thread 
    
    memory_efficient [bool]
        whether or not to reduce memory usage (at the expense of increased computational time)
            
    method: [optional] string
        method used to expand the search for pixels 'ellipse' or 'dilate'
        
    expandCore: [optional]  scipy.ndimage.morphology
        if method is dilate this represents the kernel used for expansion


    Returns
    --------    
    A: np.ndarray        
         new estimate of spatial footprints
    b: np.ndarray
        new estimate of spatial background
    C: np.ndarray        
         temporal components (updated only when spatial components are completely removed)             
       
    """

    if expandCore is None:
        expandCore = iterate_structure(generate_binary_structure(2, 1),
                                       2).astype(int)

    if d1 is None or d2 is None:
        raise Exception('You need to define the input dimensions')

    Y = np.atleast_2d(Y)
    if Y.shape[1] == 1:
        raise Exception('Dimension of Matrix Y must be pixels x time')

    C = np.atleast_2d(C)
    if C.shape[1] == 1:
        raise Exception('Dimension of Matrix C must be neurons x time')

    f = np.atleast_2d(f)
    if f.shape[1] == 1:
        raise Exception('Dimension of Matrix f must be neurons x time ')

    if len(A_in.shape) == 1:
        A_in = np.atleast_2d(A_in).T

    if A_in.shape[0] == 1:
        raise Exception('Dimension of Matrix A must be pixels x neurons ')

    start_time = time.time()

    Cf = np.vstack((C, f))  # create matrix that include background components

    [d, T] = np.shape(Y)

    if n_pixels_per_process > d:
        raise Exception(
            'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.'
        )

    nr, _ = np.shape(C)  # number of neurons

    IND = determine_search_location(A_in,
                                    d1,
                                    d2,
                                    method=method,
                                    min_size=min_size,
                                    max_size=max_size,
                                    dist=dist,
                                    expandCore=expandCore)
    print " find search location"

    ind2_ = [
        np.hstack(
            (np.where(iid_)[0], nr +
             np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else []
        for iid_ in IND
    ]

    folder = tempfile.mkdtemp()

    if backend == 'multiprocessing' or backend == 'threading':

        A_name = os.path.join(folder, 'A_temp')

        # Pre-allocate a writeable shared memory map as a container for the
        # results of the parallel computation
        print "Create Matrix for dumping data from matrix A and C for parallel computation...."
        A_ = np.memmap(A_name,
                       dtype=A_in.dtype,
                       shape=(d, nr + np.size(f, 0)),
                       mode='w+')

        pixels_name = os.path.join(folder, 'pixels')

        C_name = os.path.join(folder, 'C_temp')

        # Dump the input data to disk to free the memory
        dump(Y, pixels_name)
        dump(Cf, C_name)

        # use mempry mapped versions of C and Y
        Y = load(pixels_name, mmap_mode='r')
        Cf = load(C_name, mmap_mode='r')

        pixel_groups = [
            range(i, i + n_pixels_per_process)
            for i in range(0, Y.shape[0] - n_pixels_per_process +
                           1, n_pixels_per_process)
        ]

        # Fork the worker processes to perform computation concurrently
        print "start parallel pool..."
        sys.stdout.flush()
        Parallel(n_jobs=n_processes,
                 backend=backend,
                 verbose=100,
                 max_nbytes=None)(delayed(lars_regression_noise_parallel)(
                     Y, Cf, A_, sn, i, ind2_) for i in pixel_groups)

        # if n_pixels_per_process is not a multiple of Y.shape[0] run on remaining pixels
        pixels_remaining = Y.shape[0] % n_pixels_per_process
        if pixels_remaining > 0:
            print "Running deconvolution for remaining pixels:" + str(
                pixels_remaining)
            lars_regression_noise_parallel(Y,
                                           Cf,
                                           A_,
                                           sn,
                                           range(Y.shape[0] - pixels_remaining,
                                                 Y.shape[0]),
                                           ind2_,
                                           positive=1)
        A_ = np.array(A_)

    elif backend == 'ipyparallel':  # use the ipyparallel package, you need to start a cluster server (ipcluster command) in order to use it

        C_name = os.path.join(folder, 'C_temp.npy')
        np.save(C_name, Cf)

        if type(
                Y
        ) is np.core.memmap:  # if input file is already memory mapped then find the filename
            Y_name = Y.filename
        else:  # if not create a memory mapped version (necessary for parallelization)
            Y_name = os.path.join(folder, 'Y_temp.npy')
            np.save(Y_name, Y)
            Y = np.load(Y_name, mmap_mode='r')

        # create arguments to be passed to the function. Here we are grouping bunch of pixels to be processed by each thread
        pixel_groups = [(Y_name, C_name, sn, ind2_,
                         range(i, i + n_pixels_per_process))
                        for i in range(0, d1 * d2 - n_pixels_per_process +
                                       1, n_pixels_per_process)]

        A_ = np.zeros((d, nr + np.size(f, 0)))
        try:  # if server is not running and raise exception if not installed or not started
            from ipyparallel import Client
            c = Client()
        except:
            print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and  starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes"
            raise

        if len(c) < n_processes:
            print len(c)
            raise Exception(
                "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value"
            )

        dview = c[:n_processes]  # use the number of processes
        #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups)
        parallel_result = dview.map_sync(lars_regression_noise_ipyparallel,
                                         pixel_groups)
        for chunk in parallel_result:
            for pars in chunk:
                px, idxs_, a = pars
                A_[px, idxs_] = a
        #clean up
        dview.results.clear()
        c.purge_results('all')
        c.purge_everything()
        c.close()

    elif backend == 'single_thread':

        Cf_ = [Cf[idx_, :] for idx_ in ind2_]

        #% LARS regression
        A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0)))))

        for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)):
            if px % 1000 == 0:
                print px
            if np.size(c) > 0:
                _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1,
                                                      sn[px]**2 * T)
                if np.isscalar(a):
                    A_[px, id2_] = a
                else:
                    A_[px, id2_] = a.T

    else:
        raise Exception(
            'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel'
        )

    #%
    print 'Updated Spatial Components'
    A_ = threshold_components(A_, d1, d2)
    print "threshold"
    ff = np.where(np.sum(A_, axis=0) == 0)
    # remove empty components
    if np.size(ff) > 0:
        ff = ff[0]
        warn('eliminating empty components!!')
        nr = nr - len(ff)
        A_ = np.delete(A_, list(ff), 1)
        C = np.delete(C, list(ff), 0)

    A_ = A_[:, :nr]
    A_ = coo_matrix(A_)

    if memory_efficient:
        print "Using memory efficient computation (slow but memory preserving)"
        A__ = coo_matrix(A_, dtype=np.float32)
        C__ = coo_matrix(C[:nr, :], dtype=np.float32)
        Y_res_name = os.path.join(folder, 'Y_res_temp.npy')
        Y_res = np.memmap(Y_res_name,
                          dtype=np.float32,
                          mode='w+',
                          shape=Y.shape)
        Y_res = np.memmap(Y_res_name,
                          dtype=np.float32,
                          mode='r+',
                          shape=Y.shape)
        print "computing residuals"
        Y_res[:] = -A__.dot(C__).todense()[:]
        Y_res[:] += Y
    else:
        print "Using memory trade-off computation (good use of memory if input is memmaped)"
        Y_res = Y - A_.dot(coo_matrix(C[:nr, :]))

    print "Computing A_bas"
    A_bas = np.fmax(np.dot(Y_res, f.T) / scipy.linalg.norm(f)**2,
                    0)  # update baseline based on residual
    Y_res[:] = 1
    b = A_bas

    print("--- %s seconds ---" % (time.time() - start_time))

    try:  #clean up
        # remove temporary file created
        print "Remove temporary file created"
        shutil.rmtree(folder)

    except:

        raise Exception("Failed to delete: " + folder)

    return A_, b, C
def update_temporal_components(Y, A, b, Cin, fin, bl = None,  c1 = None, g = None,  sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, debug=False, **kwargs):
    """Update temporal components and background given spatial components using a block coordinate descent approach.
    
    Parameters
    -----------    

    Y: np.ndarray (2D)
        input data with time in the last axis (d x T)
    A: sparse matrix (crc format)
        matrix of temporal components (d x K)
    b: ndarray (dx1)
        current estimate of background component
    Cin: np.ndarray
        current estimate of temporal components (K x T)   
    fin: np.ndarray
        current estimate of temporal background (vector of length T)
    g:  np.ndarray
        Global time constant (not used)
    bl: np.ndarray
       baseline for fluorescence trace for each column in A
    c1: np.ndarray
       initial concentration for each column in A
    g:  np.ndarray       
       discrete time constant for each column in A
    sn: np.ndarray
       noise level for each column in A       
    ITER: positive integer
        Maximum number of block coordinate descent loops. 
    method_foopsi: string
        Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment.               
    n_processes: int
        number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster.
    backend: 'str'
        single_thread no parallelization
        ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type 
        ipcluster -n 6, where 6 is the number of processes). 
    memory_efficient: Bool
        whether or not to optimize for memory usage (longer running times). nevessary with very large datasets  
    **kwargs: dict
        all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are      
    p: int
        order of the autoregression model
    method: [optional] string
        solution method for constrained foopsi. Choices are
            'cvx':      using cvxopt and picos (slow especially without the MOSEK solver)
            'cvxpy':    using cvxopt and cvxpy with the ECOS solver (faster, default)
            'spgl1':    using the spgl1 package
            'debug':    using spgl1 without spike non-negativity constraints (just for debugging purposes)
    
    solvers: list string
            primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS']
            
    Note
    --------

    The temporal components are updated in parallel by default by forming of sequence of vertex covers.  
    
    Returns
    --------
    
    C:   np.ndarray
            matrix of temporal components (K x T)
    f:   np.array
            vector of temporal background (length T) 
    S:   np.ndarray            
            matrix of merged deconvolved activity (spikes) (K x T)
    bl:  float  
            same as input    
    c1:  float
            same as input    
    g:   float
            same as input    
    sn:  float
            same as input 
    YrA: np.ndarray
            matrix of spatial component filtered raw data, after all contributions have been removed.            
            YrA corresponds to the residual trace for each component and is used for faster plotting (K x T)
    """
    if not kwargs.has_key('p') or kwargs['p'] is None:
        raise Exception("You have to provide a value for p")

    d,T = np.shape(Y);    
    nr = np.shape(A)[-1]
    
    
    if  bl is None:
        bl=np.repeat(None,nr)
        
    if  c1 is None:
        c1=np.repeat(None,nr)

    if  g is None:
        g=np.repeat(None,nr)

    if  sn is None:
        sn=np.repeat(None,nr)                        
    
    A = scipy.sparse.hstack((A,coo_matrix(b)))
    S = np.zeros(np.shape(Cin));
    Cin =  np.vstack((Cin,fin));
    C = Cin;
    nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0)))
    #import pdb
    #pdb.set_trace()
    Cin=coo_matrix(Cin)
    #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))
    YA = (A.T.dot(Y).T)*spdiags(1./nA,0,nr+1,nr+1)
    AA = ((A.T.dot(A))*spdiags(1./nA,0,nr+1,nr+1)).tocsr()
    YrA = YA - Cin.T.dot(AA)
    #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1)
    
    if backend == 'ipyparallel':
        try: # if server is not running and raise exception if not installed or not started        
            from ipyparallel import Client
            c = Client()
        except:
            print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and  starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes"
            raise
    
        if len(c) <  n_processes:
            print len(c)
            raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value")            
        
        dview=c[:n_processes] # use the number of processes
    
    Cin=np.array(Cin.todense())    
    for iter in range(ITER):
        O,lo = update_order(A.tocsc()[:,:nr])
        P_=[];
        for count,jo_ in enumerate(O):
            jo=np.array(list(jo_))           
            #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T
            Ytemp = YrA[:,jo.flatten()] + Cin[jo,:].T
            Ctemp = np.zeros((np.size(jo),T))
            Stemp = np.zeros((np.size(jo),T))
            btemp = np.zeros((np.size(jo),1))
            sntemp = btemp.copy()
            c1temp = btemp.copy()
            gtemp = np.zeros((np.size(jo),kwargs['p']));
            nT = nA[jo]            
                        
#            args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))]
            args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))]
#            import pdb
#            pdb.set_trace()
            if backend == 'ipyparallel':                    
                #
                if debug:                
                    results = dview.map_async(constrained_foopsi_parallel,args_in)  
                    results.get()
                    for outp in results.stdout:   
                        print outp[:-1]  
                        sys.stdout.flush()                                                 
                    for outp in results.stderr:   
                        print outp[:-1]  
                        sys.stderr.flush()            
                    
                else:
                    
                    results = dview.map_sync(constrained_foopsi_parallel,args_in)
                
            elif backend == 'single_thread':
                
                results = map(constrained_foopsi_parallel,args_in)            
                
            else:
                
                raise Exception('Backend not defined. Use either single_thread or ipyparallel')
                
            for chunk in results:
                pars=dict()
                C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk                    
                Ctemp[jj_,:] = C_[None,:]
                                
                Stemp[jj_,:] = Sp_               
                Ytemp[:,jj_] = Ytemp_[:,None]            
                btemp[jj_] = cb_
                c1temp[jj_] = c1_
                sntemp[jj_] = sn_   
                gtemp[jj_,:] = gn_.T  
                   
                bl[jo[jj_]] = cb_
                c1[jo[jj_]] = c1_
                sn[jo[jj_]] = sn_
                g[jo[jj_]]  = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:]
                                             
                pars['b'] = cb_
                pars['c1'] = c1_                 
                pars['neuron_sn'] = sn_
                pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] 
                pars['neuron_id'] = jo[jj_]
                P_.append(pars)
            
            YrA -= (Ctemp-C[jo,:]).T*AA[jo,:]
            #YrA[:,jo] = Ytemp
            C[jo,:] = Ctemp.copy()            
            S[jo,:] = Stemp
            
#            if (np.sum(lo[:jo])+1)%1 == 0:
            print str(np.sum(lo[:count+1])) + ' out of total ' + str(nr) + ' temporal components updated'
        
        ii=nr        

        #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T
        #cc = np.maximum(YrA[:,ii],0)        
        cc = np.maximum(YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T,0)
        YrA -= (cc-np.atleast_2d(Cin[ii,:]).T)*AA[ii,:]      
        C[ii,:] = cc.T
        #YrA = YA - C.T.dot(AA)
        #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T                
        
        if backend == 'ipyparallel':       
            dview.results.clear()   
            c.purge_results('all')
            c.purge_everything()

        if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3:
            # stop if the overall temporal component does not change by much
            print "stopping: overall temporal component not changing significantly"
            break
        else:
            Cin = C
        
    f = C[nr:,:]
    C = C[:nr,:]
    YrA = np.array(YrA[:,:nr]).T    
    P_ = sorted(P_, key=lambda k: k['neuron_id']) 
    if backend == 'ipyparallel':      
        c.close()
    
    return C,f,S,bl,c1,sn,g,YrA #,P_
def run_CNMF_patches(file_name,
                     shape,
                     options,
                     rf=16,
                     stride=4,
                     n_processes=2,
                     backend='single_thread',
                     memory_fact=1):
    """Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running
        
    Parameters
    ----------        
    file_name: string
        full path to an npy file (2D, pixels x time) containing the movie        
        
    shape: tuple of thre elements
        dimensions of the original movie across y, x, and time 
    
    options:
        dictionary containing all the parameters for the various algorithms
    
    rf: int 
        half-size of the square patch in pixel
    
    stride: int
        amount of overlap between patches
        
    backend: string
        'ipyparallel' or 'single_thread'
    
    n_processes: int
        nuber of cores to be used (should be less than the number of cores started with ipyparallel)
        
    memory_fact: double
        unitless number accounting how much memory should be used. It represents the fration of patch processed in a single thread. You will need to try different values to see which one would work
    
    
    Returns
    -------
    A_tot: matrix containing all the componenents from all the patches
    
    C_tot: matrix containing the calcium traces corresponding to A_tot
    
    sn_tot: per pixel noise estimate
    
    optional_outputs: set of outputs related to the result of CNMF ALGORITHM ON EACH patch   
    """
    (d1, d2, T) = shape
    d = d1 * d2
    K = options['init_params']['K']

    options['preprocess_params']['backend'] = 'single_thread'
    options['preprocess_params']['n_pixels_per_process'] = np.int(
        (rf * rf) / memory_fact)
    options['spatial_params']['n_pixels_per_process'] = np.int(
        (rf * rf) / memory_fact)
    options['temporal_params']['n_pixels_per_process'] = np.int(
        (rf * rf) / memory_fact)
    options['spatial_params']['backend'] = 'single_thread'
    options['temporal_params']['backend'] = 'single_thread'

    idx_flat, idx_2d = extract_patch_coordinates(d1, d2, rf=rf, stride=stride)
    #    import pdb
    #    pdb.set_trace()
    args_in = []
    for id_f, id_2d in zip(idx_flat[:], idx_2d[:]):
        args_in.append((file_name, id_f, id_2d[0].shape, options))

    print len(idx_flat)

    st = time.time()

    if backend is 'ipyparallel':

        try:

            c = Client()
            dview = c[:n_processes]
            file_res = dview.map_sync(cnmf_patches, args_in)
            dview.results.clear()
            c.purge_results('all')
            c.purge_everything()
            c.close()
        except:
            print('Something went wrong')
            raise
        finally:
            print('You may think that it went well but reality is harsh')

    elif backend is 'single_thread':

        file_res = map(cnmf_patches, args_in)

    else:
        raise Exception('Backend unknown')

    print time.time() - st

    # extract the values from the output of mapped computation
    num_patches = len(file_res)

    A_tot = scipy.sparse.csc_matrix((d, K * num_patches))
    B_tot = scipy.sparse.csc_matrix((d, num_patches))
    C_tot = np.zeros((K * num_patches, T))
    F_tot = np.zeros((num_patches, T))
    mask = np.zeros(d)
    sn_tot = np.zeros((d1 * d2))
    b_tot = []
    f_tot = []
    bl_tot = []
    c1_tot = []
    neurons_sn_tot = []
    g_tot = []
    idx_tot = []
    shapes_tot = []
    id_patch_tot = []

    count = 0
    patch_id = 0

    print 'Transforming patches into full matrix'

    for idx_, shapes, A, b, C, f, S, bl, c1, neurons_sn, g, sn, _ in file_res:

        sn_tot[idx_] = sn
        b_tot.append(b)
        f_tot.append(f)
        bl_tot.append(bl)
        c1_tot.append(c1)
        neurons_sn_tot.append(neurons_sn)
        g_tot.append(g)
        idx_tot.append(idx_)
        shapes_tot.append(shapes)
        mask[idx_] += 1
        F_tot[patch_id, :] = f
        B_tot[idx_, patch_id] = b

        for ii in range(np.shape(A)[-1]):
            new_comp = A.tocsc()[:, ii] / np.sqrt(
                np.sum(np.array(A.tocsc()[:, ii].todense())**2))
            if new_comp.sum() > 0:
                A_tot[idx_, count] = new_comp
                C_tot[count, :] = C[ii, :]
                id_patch_tot.append(patch_id)
                count += 1

        patch_id += 1

    A_tot = A_tot[:, :count]
    C_tot = C_tot[:count, :]

    optional_outputs = dict()
    optional_outputs['b_tot'] = b_tot
    optional_outputs['f_tot'] = f_tot
    optional_outputs['bl_tot'] = bl_tot
    optional_outputs['c1_tot'] = c1_tot
    optional_outputs['neurons_sn_tot'] = neurons_sn_tot
    optional_outputs['g_tot'] = g_tot
    optional_outputs['idx_tot'] = idx_tot
    optional_outputs['shapes_tot'] = shapes_tot
    optional_outputs['id_patch_tot'] = id_patch_tot
    optional_outputs['B'] = B_tot
    optional_outputs['F'] = F_tot
    optional_outputs['mask'] = mask

    Im = scipy.sparse.csr_matrix((1. / mask, (np.arange(d), np.arange(d))))
    Bm = Im.dot(B_tot)
    A_tot = Im.dot(A_tot)
    f = np.mean(F_tot, axis=0)

    for iter in range(10):
        b = Bm.dot(F_tot.dot(f)) / np.sum(f**2)
        f = np.dot((Bm.T.dot(b)).T, F_tot) / np.sum(b**2)

    return A_tot, C_tot, b, f, sn_tot, optional_outputs
Exemple #12
0
def update_spatial_components(Y,
                              C,
                              f,
                              A_in,
                              sn=None,
                              d1=None,
                              d2=None,
                              min_size=3,
                              max_size=8,
                              dist=3,
                              method='ellipse',
                              expandCore=None,
                              backend='single_thread',
                              n_processes=4,
                              n_pixels_per_process=128):
    """update spatial footprints and background through Basis Pursuit Denoising

    for each pixel i solve the problem
        [A(i,:),b(i)] = argmin sum(A(i,:))
    subject to
        || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T);

    for each pixel the search is limited to a few spatial components

    Parameters
    ----------
    Y: np.ndarray (2D)
        movie, raw data in 2D (pixels x time).
    C: np.ndarray
        calcium activity of each neuron.
    f: np.ndarray
        temporal profile  of background activity.
    Ain: np.ndarray
        spatial profile of background activity.

    d1: [optional] int
        x movie dimension

    d2: [optional] int
        y movie dimension

    min_size: [optional] int

    max_size: [optional] int

    dist: [optional] int


    sn: [optional] float
        noise associated with each pixel if known

    n_processes: [optional] int
        number of threads to use when the backend is multiprocessing,threading, or ipyparallel

    backend [optional] str
        'ipyparallel', 'single_thread'
        single_thread:no parallelization. It can be used with small datasets.
        ipyparallel: uses ipython clusters and then send jobs to each of them


    n_pixels_per_process: [optional] int
        number of pixels to be processed by each thread


    method: [optional] string
        method used to expand the search for pixels 'ellipse' or 'dilate'

    expandCore: [optional]  scipy.ndimage.morphology
        if method is dilate this represents the kernel used for expansion


    Returns
    --------
    A: np.ndarray
         new estimate of spatial footprints
    b: np.ndarray
        new estimate of spatial background
    C: np.ndarray
         temporal components (updated only when spatial components are completely removed)

    """
    if expandCore is None:
        expandCore = iterate_structure(generate_binary_structure(2, 1),
                                       2).astype(int)

    if d1 is None or d2 is None:
        raise Exception('You need to define the input dimensions')

    if Y.ndim < 2 and not type(Y) is str:
        Y = np.atleast_2d(Y)

    if Y.shape[1] == 1:
        raise Exception('Dimension of Matrix Y must be pixels x time')

    C = np.atleast_2d(C)
    if C.shape[1] == 1:
        raise Exception('Dimension of Matrix C must be neurons x time')

    f = np.atleast_2d(f)
    if f.shape[1] == 1:
        raise Exception('Dimension of Matrix f must be neurons x time ')

    if len(A_in.shape) == 1:
        A_in = np.atleast_2d(A_in).T

    if A_in.shape[0] == 1:
        raise Exception('Dimension of Matrix A must be pixels x neurons ')

    start_time = time.time()

    Cf = np.vstack((C, f))  # create matrix that include background components

    [d, T] = np.shape(Y)

    if n_pixels_per_process > d:
        raise Exception(
            'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.'
        )

    nr, _ = np.shape(C)  # number of neurons

    IND = determine_search_location(A_in,
                                    d1,
                                    d2,
                                    method=method,
                                    min_size=min_size,
                                    max_size=max_size,
                                    dist=dist,
                                    expandCore=expandCore)
    print " find search location"

    ind2_ = [
        np.hstack(
            (np.where(iid_)[0], nr +
             np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else []
        for iid_ in IND
    ]

    folder = tempfile.mkdtemp()

    # use the ipyparallel package, you need to start a cluster server
    # (ipcluster command) in order to use it
    if backend == 'ipyparallel':

        C_name = os.path.join(folder, 'C_temp.npy')
        np.save(C_name, Cf)

        if type(
                Y
        ) is np.core.memmap:  # if input file is already memory mapped then find the filename
            Y_name = Y.filename
        # if not create a memory mapped version (necessary for parallelization)
        elif type(Y) is str:
            Y_name = Y
        else:
            Y_name = os.path.join(folder, 'Y_temp.npy')
            np.save(Y_name, Y)
            Y, _, _, _ = load_memmap(Y_name)

        # create arguments to be passed to the function. Here we are grouping
        # bunch of pixels to be processed by each thread
        pixel_groups = [(Y_name, C_name, sn, ind2_,
                         range(i, i + n_pixels_per_process))
                        for i in range(0, d1 * d2 - n_pixels_per_process +
                                       1, n_pixels_per_process)]

        A_ = np.zeros((d, nr + np.size(f, 0)))

        try:  # if server is not running and raise exception if not installed or not started
            from ipyparallel import Client
            c = Client()
        except:
            print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and  starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes"
            raise

        if len(c) < n_processes:
            print len(c)
            raise Exception(
                "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value"
            )

        dview = c[:n_processes]  # use the number of processes
        #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups)
        parallel_result = dview.map_sync(lars_regression_noise_ipyparallel,
                                         pixel_groups)
        # clean up

        for chunk in parallel_result:
            for pars in chunk:
                px, idxs_, a = pars
                A_[px, idxs_] = a

        dview.results.clear()
        c.purge_results('all')
        c.purge_everything()
        c.close()

    elif backend == 'single_thread':

        Cf_ = [Cf[idx_, :] for idx_ in ind2_]

        #% LARS regression
        A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0)))))

        for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)):
            if px % 1000 == 0:
                print px
            if np.size(c) > 0:
                _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1,
                                                      sn[px]**2 * T)
                if np.isscalar(a):
                    A_[px, id2_] = a
                else:
                    A_[px, id2_] = a.T

    else:
        raise Exception(
            'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel'
        )

    #%
    print 'Updated Spatial Components'

    A_ = threshold_components(A_, d1, d2)

    print "threshold"
    ff = np.where(np.sum(A_, axis=0) == 0)  # remove empty components
    if np.size(ff) > 0:
        ff = ff[0]
        print('eliminating empty components!!')
        nr = nr - len(ff)
        A_ = np.delete(A_, list(ff), 1)
        C = np.delete(C, list(ff), 0)

    A_ = A_[:, :nr]
    A_ = coo_matrix(A_)

    #    import pdb
    #    pdb.set_trace()
    Y_resf = np.dot(Y, f.T) - A_.dot(coo_matrix(C[:nr, :]).dot(f.T))
    print "Computing A_bas"
    A_bas = np.fmax(Y_resf / scipy.linalg.norm(f)**2,
                    0)  # update baseline based on residual
    # A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update
    # baseline based on residual
    b = A_bas

    print("--- %s seconds ---" % (time.time() - start_time))

    try:  # clean up
        # remove temporary file created
        print "Remove temporary file created"
        shutil.rmtree(folder)

    except:

        raise Exception("Failed to delete: " + folder)

    return A_, b, C
Exemple #13
0
def update_temporal_components(Y,
                               A,
                               b,
                               Cin,
                               fin,
                               bl=None,
                               c1=None,
                               g=None,
                               sn=None,
                               ITER=2,
                               method_foopsi='constrained_foopsi',
                               n_processes=1,
                               backend='single_thread',
                               memory_efficient=False,
                               debug=False,
                               **kwargs):
    """Update temporal components and background given spatial components using a block coordinate descent approach.
    
    Parameters
    -----------    

    Y: np.ndarray (2D)
        input data with time in the last axis (d x T)
    A: sparse matrix (crc format)
        matrix of temporal components (d x K)
    b: ndarray (dx1)
        current estimate of background component
    Cin: np.ndarray
        current estimate of temporal components (K x T)   
    fin: np.ndarray
        current estimate of temporal background (vector of length T)
    g:  np.ndarray
        Global time constant (not used)
    bl: np.ndarray
       baseline for fluorescence trace for each column in A
    c1: np.ndarray
       initial concentration for each column in A
    g:  np.ndarray       
       discrete time constant for each column in A
    sn: np.ndarray
       noise level for each column in A       
    ITER: positive integer
        Maximum number of block coordinate descent loops. 
    method_foopsi: string
        Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment.               
    n_processes: int
        number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster.
    backend: 'str'
        single_thread no parallelization
        ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type 
        ipcluster -n 6, where 6 is the number of processes). 
    memory_efficient: Bool
        whether or not to optimize for memory usage (longer running times). nevessary with very large datasets  
    **kwargs: dict
        all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are      
    p: int
        order of the autoregression model
    method: [optional] string
        solution method for constrained foopsi. Choices are
            'cvx':      using cvxopt and picos (slow especially without the MOSEK solver)
            'cvxpy':    using cvxopt and cvxpy with the ECOS solver (faster, default)
    
    solvers: list string
            primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS']
            
    Note
    --------

    The temporal components are updated in parallel by default by forming of sequence of vertex covers.  
    
    Returns
    --------
    
    C:   np.ndarray
            matrix of temporal components (K x T)
    f:   np.array
            vector of temporal background (length T) 
    S:   np.ndarray            
            matrix of merged deconvolved activity (spikes) (K x T)
    bl:  float  
            same as input    
    c1:  float
            same as input    
    g:   float
            same as input    
    sn:  float
            same as input 
    YrA: np.ndarray
            matrix of spatial component filtered raw data, after all contributions have been removed.            
            YrA corresponds to the residual trace for each component and is used for faster plotting (K x T)
    """
    if not kwargs.has_key('p') or kwargs['p'] is None:
        raise Exception("You have to provide a value for p")

    d, T = np.shape(Y)
    nr = np.shape(A)[-1]

    if bl is None:
        bl = np.repeat(None, nr)

    if c1 is None:
        c1 = np.repeat(None, nr)

    if g is None:
        g = np.repeat(None, nr)

    if sn is None:
        sn = np.repeat(None, nr)

    A = scipy.sparse.hstack((A, coo_matrix(b)))
    S = np.zeros(np.shape(Cin))
    Cin = np.vstack((Cin, fin))
    C = Cin
    nA = np.squeeze(np.array(np.sum(np.square(A.todense()), axis=0)))
    #import pdb
    #pdb.set_trace()
    Cin = coo_matrix(Cin)
    #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))
    YA = (A.T.dot(Y).T) * spdiags(1. / nA, 0, nr + 1, nr + 1)
    AA = ((A.T.dot(A)) * spdiags(1. / nA, 0, nr + 1, nr + 1)).tocsr()
    YrA = YA - Cin.T.dot(AA)
    #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1)

    if backend == 'ipyparallel':
        try:  # if server is not running and raise exception if not installed or not started
            from ipyparallel import Client
            c = Client()
        except:
            print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and  starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes"
            raise

        if len(c) < n_processes:
            print len(c)
            raise Exception(
                "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value"
            )

        dview = c[:n_processes]  # use the number of processes

    Cin = np.array(Cin.todense())
    for iter in range(ITER):
        O, lo = update_order(A.tocsc()[:, :nr])
        P_ = []
        for count, jo_ in enumerate(O):
            jo = np.array(list(jo_))
            #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T
            Ytemp = YrA[:, jo.flatten()] + Cin[jo, :].T
            Ctemp = np.zeros((np.size(jo), T))
            Stemp = np.zeros((np.size(jo), T))
            btemp = np.zeros((np.size(jo), 1))
            sntemp = btemp.copy()
            c1temp = btemp.copy()
            gtemp = np.zeros((np.size(jo), kwargs['p']))
            nT = nA[jo]

            #            args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))]
            args_in = [(np.squeeze(np.array(Ytemp[:, jj])), nT[jj], jj, None,
                        None, None, None, kwargs) for jj in range(len(jo))]
            #            import pdb
            #            pdb.set_trace()
            if backend == 'ipyparallel':
                #
                if debug:
                    results = dview.map_async(constrained_foopsi_parallel,
                                              args_in)
                    results.get()
                    for outp in results.stdout:
                        print outp[:-1]
                        sys.stdout.flush()
                    for outp in results.stderr:
                        print outp[:-1]
                        sys.stderr.flush()

                else:

                    results = dview.map_sync(constrained_foopsi_parallel,
                                             args_in)

            elif backend == 'single_thread':

                results = map(constrained_foopsi_parallel, args_in)

            else:

                raise Exception(
                    'Backend not defined. Use either single_thread or ipyparallel'
                )

            for chunk in results:
                pars = dict()
                C_, Sp_, Ytemp_, cb_, c1_, sn_, gn_, jj_ = chunk
                Ctemp[jj_, :] = C_[None, :]

                Stemp[jj_, :] = Sp_
                Ytemp[:, jj_] = Ytemp_[:, None]
                btemp[jj_] = cb_
                c1temp[jj_] = c1_
                sntemp[jj_] = sn_
                gtemp[jj_, :] = gn_.T

                bl[jo[jj_]] = cb_
                c1[jo[jj_]] = c1_
                sn[jo[jj_]] = sn_
                g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else []  #gtemp[jj,:]

                pars['b'] = cb_
                pars['c1'] = c1_
                pars['neuron_sn'] = sn_
                pars['gn'] = gtemp[jj_, np.abs(gtemp[jj, :]) > 0]
                pars['neuron_id'] = jo[jj_]
                P_.append(pars)

            YrA -= (Ctemp - C[jo, :]).T * AA[jo, :]
            #YrA[:,jo] = Ytemp
            C[jo, :] = Ctemp.copy()
            S[jo, :] = Stemp

            #            if (np.sum(lo[:jo])+1)%1 == 0:
            print str(np.sum(lo[:count + 1])) + ' out of total ' + str(
                nr) + ' temporal components updated'

        ii = nr

        #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T
        #cc = np.maximum(YrA[:,ii],0)
        cc = np.maximum(YrA[:, ii] + np.atleast_2d(Cin[ii, :]).T, 0)
        YrA -= (cc - np.atleast_2d(Cin[ii, :]).T) * AA[ii, :]
        C[ii, :] = cc.T
        #YrA = YA - C.T.dot(AA)
        #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T

        if backend == 'ipyparallel':
            dview.results.clear()
            c.purge_results('all')
            c.purge_everything()

        if scipy.linalg.norm(Cin - C, 'fro') / scipy.linalg.norm(
                C, 'fro') <= 1e-3:
            # stop if the overall temporal component does not change by much
            print "stopping: overall temporal component not changing significantly"
            break
        else:
            Cin = C

    f = C[nr:, :]
    C = C[:nr, :]
    YrA = np.array(YrA[:, :nr]).T
    P_ = sorted(P_, key=lambda k: k['neuron_id'])
    if backend == 'ipyparallel':
        c.close()

    return C, f, S, bl, c1, sn, g, YrA  #,P_
m = T.shape[0]
percentiles = list(arange(0.5, 100.0, 0.5))
rng = RandomState(0)
seeds = rng.random_integers(0, 2 ** 31 - 2, size=EX_NUM)

for tr in trends:
    results = zeros((len(percentiles), len(T), EX_NUM)) * nan
    filename = 'adf_z_' + tr + '.npz'

    for i in range(EX_NUM):
        print("Experiment Number {0} for Trend {1}".format(i + 1, tr))
        # Non parallel version
        # out = lmap(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m))
        now = datetime.datetime.now()
        out = lview.map_sync(wrapper,
                             T,
                             [tr] * m, [EX_SIZE] * m,
                             [seeds[i]] * m)
        # Prevent unnecessary results from accumulating
        lview.purge_results('all')
        rc.purge_everything()
        print(datetime.datetime.now() - now)
        quantiles = lmap(lambda x: percentile(x, percentiles), out)
        results[:, :, i] = array(quantiles).T

        if i % 50 == 0:
            savez(filename, trend=tr, results=results,
                  percentiles=percentiles, T=T)

    savez(filename, trend=tr, results=results, percentiles=percentiles, T=T)
def extract_rois_patch(file_name,d1,d2,rf=5,stride = 2):
    not_completed, in_progress
    rf=6
    stride = 2
    idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf,stride = stride)
    perctl=95
    n_components=2
    tol=1e-6
    max_iter=5000
    args_in=[]    
    for id_f,id_2d in zip(idx_flat,idx_2d):        
        args_in.append((file_name, id_f,id_2d[0].shape, perctl,n_components,tol,max_iter))
    st=time.time()
    try:
        if 1:
            c = Client()   
            dview=c[:]
            file_res = dview.map_sync(nmf_patches, args_in)                         
        else:
            file_res = map(nmf_patches, args_in)                         
    finally:
        dview.results.clear()   
        c.purge_results('all')
        c.purge_everything()
        c.close()
    
    print time.time()-st
    
    A1=lil_matrix((d1*d2,len(file_res)))
    C1=[]
    A2=lil_matrix((d1*d2,len(file_res)))
    C2=[]
    A_tot=lil_matrix((d1*d2,n_components*len(file_res)))
    C_tot=[];
    count_out=0
    for count,f in enumerate(file_res):
        idx_,flt,ca,d=f
        print count_out
        #flt,ca,_=cse.order_components(coo_matrix(flt),ca)
        
#        A1[idx_,count]=flt[:,0][:,np.newaxis]/np.sqrt(np.sum(flt[:,0]**2))      
#        A2[idx_,count]=flt[:,1][:,np.newaxis] /np.sqrt(np.sum(flt[:,1]**2))              
#        C1.append(ca[0,:])
#        C2.append(ca[1,:])
        for ccc in range(n_components):
            A_tot[idx_,count_out]=flt[:,ccc][:,np.newaxis]/np.sqrt(np.sum(flt[:,ccc]**2))      
            C_tot.append(ca[ccc,:])
            count_out+=1
#        pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10)
#        pl.pause(.1)
        
    correlations=np.corrcoef(np.array(C_tot))
    centers=cse.com(A_tot.todense(),d1,d2)
    distances=sklearn.metrics.pairwise.euclidean_distances(centers)
    pl.imshow((correlations>0.8) & (distances<10))  
    
    Yr=np.load('Yr.npy',mmap_mode='r')
    [d,T]=Yr.shape
    Y=np.reshape(Yr,(d1,d2,T),order='F')
    options=cse.utilities.CNMFSetParms(Y,p=0)    
    res_merge=cse.merge_components(Yr,A_tot,[],np.array(C_tot),[],np.array(C_tot),[],options['temporal_params'],options['spatial_params'],thr=0.8)
    A_m,C_m,nr_m,merged_ROIs,S_m,bl_m,c1_m,sn_m,g_m=res_merge
    A_norm=np.array([A_m[:,rr].toarray()/np.sqrt(np.sum(A_m[:,rr].toarray()**2)) for rr in range(A_m.shape[-1])]).T
    
    options=cse.utilities.CNMFSetParms(Y,p=2,K=np.shape(A_m)[-1])   
    
    Yr,sn,g=cse.pre_processing.preprocess_data(Yr,**options['preprocess_params'])
    
    epsilon=1e-2
    pixels_bckgrnd=np.nonzero(A_norm.sum(axis=-1)<epsilon)[0]
    f=np.sum(Yr[pixels_bckgrnd,:],axis=0)
    A2,b2,C2 = cse.spatial.update_spatial_components(Yr, C_m, f, A_m, sn=sn, **options['spatial_params'])
    A_or2, C_or2, srt2 = cse.utilities.order_components(A2,C2)
    A_norm2=np.array([A_or2[:,rr]/np.sqrt(np.sum(A_or2[:,rr]**2)) for rr in range(A_or2.shape[-1])]).T
    options['temporal_params']['p'] = 2 # set it back to original value to perform full deconvolution
    C2,f2,S2,bl2,c12,neurons_sn2,g21,YrA = cse.temporal.update_temporal_components(Yr,A2,b2,C2,f,bl=None,c1=None,sn=None,g=None,**options['temporal_params'])
    A_or, C_or, srt = cse.utilities.order_components(A2,C2)
    
    return A1,A2,C1
def update_spatial_components(Y, C, f, A_in, sn=None, d1=None, d2=None, min_size=3, max_size=8, dist=3, 
                              method='ellipse', expandCore=None, backend='single_thread', n_processes=4, n_pixels_per_process=128 ):
    """update spatial footprints and background through Basis Pursuit Denoising

    for each pixel i solve the problem
        [A(i,:),b(i)] = argmin sum(A(i,:))
    subject to
        || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T);

    for each pixel the search is limited to a few spatial components

    Parameters
    ----------
    Y: np.ndarray (2D)
        movie, raw data in 2D (pixels x time).
    C: np.ndarray
        calcium activity of each neuron.
    f: np.ndarray
        temporal profile  of background activity.
    Ain: np.ndarray
        spatial profile of background activity.

    d1: [optional] int
        x movie dimension

    d2: [optional] int
        y movie dimension

    min_size: [optional] int

    max_size: [optional] int

    dist: [optional] int


    sn: [optional] float
        noise associated with each pixel if known

    n_processes: [optional] int
        number of threads to use when the backend is multiprocessing,threading, or ipyparallel

    backend [optional] str
        'ipyparallel', 'single_thread'
        single_thread:no parallelization. It can be used with small datasets.
        ipyparallel: uses ipython clusters and then send jobs to each of them


    n_pixels_per_process: [optional] int
        number of pixels to be processed by each thread


    method: [optional] string
        method used to expand the search for pixels 'ellipse' or 'dilate'

    expandCore: [optional]  scipy.ndimage.morphology
        if method is dilate this represents the kernel used for expansion


    Returns
    --------
    A: np.ndarray
         new estimate of spatial footprints
    b: np.ndarray
        new estimate of spatial background
    C: np.ndarray
         temporal components (updated only when spatial components are completely removed)

    """
    if expandCore is None:
        expandCore = iterate_structure(generate_binary_structure(2, 1), 2).astype(int)

    if d1 is None or d2 is None:
        raise Exception('You need to define the input dimensions')
    
    if Y.ndim<2 and not type(Y) is str:
        Y = np.atleast_2d(Y)
        
    if Y.shape[1] == 1:
        raise Exception('Dimension of Matrix Y must be pixels x time')

    C = np.atleast_2d(C)
    if C.shape[1] == 1:
        raise Exception('Dimension of Matrix C must be neurons x time')

    f = np.atleast_2d(f)
    if f.shape[1] == 1:
        raise Exception('Dimension of Matrix f must be neurons x time ')

    if len(A_in.shape) == 1:
        A_in = np.atleast_2d(A_in).T

    if A_in.shape[0] == 1:
        raise Exception('Dimension of Matrix A must be pixels x neurons ')

    start_time = time.time()

    Cf = np.vstack((C, f))  # create matrix that include background components

    [d, T] = np.shape(Y)    

    if n_pixels_per_process > d:
        raise Exception(
            'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.')

    nr, _ = np.shape(C)       # number of neurons
    
    IND = determine_search_location(
        A_in, d1, d2, method=method, min_size=min_size, max_size=max_size, dist=dist, expandCore=expandCore)
    print " find search location"


    ind2_ = [np.hstack((np.where(iid_)[0], nr + np.arange(f.shape[0])))
             if np.size(np.where(iid_)[0]) > 0 else [] for iid_ in IND]

    folder = tempfile.mkdtemp()

    # use the ipyparallel package, you need to start a cluster server
    # (ipcluster command) in order to use it
    if backend == 'ipyparallel':

        C_name = os.path.join(folder, 'C_temp.npy')
        np.save(C_name, Cf)

        if type(Y) is np.core.memmap:  # if input file is already memory mapped then find the filename
            Y_name = Y.filename            
        # if not create a memory mapped version (necessary for parallelization)
        elif type(Y) is str:
            Y_name = Y            
        else:
            Y_name = os.path.join(folder, 'Y_temp.npy')
            np.save(Y_name, Y)            
            Y,_,_,_=load_memmap(Y_name)    

        # create arguments to be passed to the function. Here we are grouping
        # bunch of pixels to be processed by each thread
        pixel_groups = [(Y_name, C_name, sn, ind2_, range(i, i + n_pixels_per_process))
                        for i in range(0, d1 * d2 - n_pixels_per_process + 1, n_pixels_per_process)]

        A_ = np.zeros((d, nr + np.size(f, 0)))
    
        try:  # if server is not running and raise exception if not installed or not started
            from ipyparallel import Client
            c = Client()
        except:
            print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and  starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes"
            raise

        if len(c) < n_processes:
            print len(c)
            raise Exception(
                "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value")

        dview = c[:n_processes]  # use the number of processes
        #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups)                        
        parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups)
        # clean up
       
        
        for chunk in parallel_result:
            for pars in chunk:
                px, idxs_, a = pars
                A_[px, idxs_] = a
        
        dview.results.clear()
        c.purge_results('all')
        c.purge_everything()
        c.close()

    elif backend == 'single_thread':

        Cf_ = [Cf[idx_, :] for idx_ in ind2_]

        #% LARS regression
        A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0)))))

        for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)):
            if px % 1000 == 0:
                print px
            if np.size(c) > 0:
                _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1, sn[px]**2 * T)
                if np.isscalar(a):
                    A_[px, id2_] = a
                else:
                    A_[px, id2_] = a.T

    else:
        raise Exception(
            'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel')
    
    #%
    print 'Updated Spatial Components'
   
    A_ = threshold_components(A_, d1, d2)

    print "threshold"
    ff = np.where(np.sum(A_, axis=0) == 0)           # remove empty components
    if np.size(ff) > 0:
        ff = ff[0]
        print('eliminating empty components!!')
        nr = nr - len(ff)
        A_ = np.delete(A_, list(ff), 1)
        C = np.delete(C, list(ff), 0)
    

    A_ = A_[:, :nr]
    A_ = coo_matrix(A_)
    
#    import pdb 
#    pdb.set_trace()
    Y_resf = np.dot(Y, f.T) - A_.dot(coo_matrix(C[:nr, :]).dot(f.T))
    print "Computing A_bas"
    A_bas = np.fmax(Y_resf / scipy.linalg.norm(f)**2, 0)  # update baseline based on residual
    # A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update
    # baseline based on residual
    b = A_bas

    print("--- %s seconds ---" % (time.time() - start_time))

    try:  # clean up
        # remove temporary file created
        print "Remove temporary file created"
        shutil.rmtree(folder)

    except:

        raise Exception("Failed to delete: " + folder)

    return A_, b, C
def update_spatial_components_parallel(Y,C,f,A_in,sn=None, d1=None,d2=None,min_size=3,max_size=8, dist=3, method = 'ellipse', expandCore = None,backend='single_thread',n_processes=4,n_pixels_per_process=128, memory_efficient=False):
    """update spatial footprints and background     
    through Basis Pursuit Denoising

    for each pixel i solve the problem 
        [A(i,:),b(i)] = argmin sum(A(i,:))
    subject to 
        || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T);
    
    for each pixel the search is limited to a few spatial components
    
    Parameters
    ----------   
    Y: np.ndarray (2D)
        movie, raw data in 2D (pixels x time).
    C: np.ndarray
        calcium activity of each neuron. 
    f: np.ndarray
        temporal profile  of background activity.
    Ain: np.ndarray
        spatial profile of background activity.    
        
    d1: [optional] int
        x movie dimension
        
    d2: [optional] int
        y movie dimension

    min_size: [optional] int
                
    max_size: [optional] int
                
    dist: [optional] int
        
        
    sn: [optional] float
        noise associated with each pixel if known
        
    n_processes: [optional] int
        number of threads to use when the backend is multiprocessing,threading, or ipyparallel
        
    backend [optional] str
        'multiprocessing', 'threading', 'ipyparallel', 'single_thread' 
        single_thread:no parallelization. It shoul dbe used in most cases.         
        multiprocessing or threading: use the corresponding python threading package. It has known issues on mac OS. Not to be used in most situations.
        ipyparallel: starts an ipython cluster and then send jobs to each of them 
        
    
    n_pixels_per_process: [optional] int
        number of pixels to be processed by each thread 
    
    memory_efficient [bool]
        whether or not to reduce memory usage (at the expense of increased computational time)
            
    method: [optional] string
        method used to expand the search for pixels 'ellipse' or 'dilate'
        
    expandCore: [optional]  scipy.ndimage.morphology
        if method is dilate this represents the kernel used for expansion


    Returns
    --------    
    A: np.ndarray        
         new estimate of spatial footprints
    b: np.ndarray
        new estimate of spatial background
    C: np.ndarray        
         temporal components (updated only when spatial components are completely removed)             
       
    """

        
    
    if expandCore is None:
        expandCore=iterate_structure(generate_binary_structure(2,1), 2).astype(int)
    
    if d1 is None or d2 is None:
        raise Exception('You need to define the input dimensions')
    
    Y=np.atleast_2d(Y)
    if Y.shape[1]==1:
        raise Exception('Dimension of Matrix Y must be pixels x time')
    
    C=np.atleast_2d(C)
    if C.shape[1]==1:
        raise Exception('Dimension of Matrix C must be neurons x time')
    
    f=np.atleast_2d(f)
    if f.shape[1]==1:
         raise Exception('Dimension of Matrix f must be neurons x time ')
        
    if len(A_in.shape)==1:
        A_in=np.atleast_2d(A_in).T

    if A_in.shape[0]==1:
         raise Exception('Dimension of Matrix A must be pixels x neurons ')
    
    start_time = time.time()
    
    Cf = np.vstack((C,f)) # create matrix that include background components
        
    [d,T] = np.shape(Y)
    
    if n_pixels_per_process > d:
        raise Exception('The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.')

    nr,_ = np.shape(C)       # number of neurons
    
    IND = determine_search_location(A_in,d1,d2,method = method, min_size = min_size, max_size = max_size, dist = dist, expandCore = expandCore)
    print " find search location"
    
    ind2_ =[ np.hstack( (np.where(iid_)[0] , nr+np.arange(f.shape[0])) )   if  np.size(np.where(iid_)[0])>0  else [] for iid_ in IND]


    folder = tempfile.mkdtemp()
    
    if backend == 'multiprocessing' or backend == 'threading':

        A_name = os.path.join(folder, 'A_temp')  
                      
        # Pre-allocate a writeable shared memory map as a container for the
        # results of the parallel computation     
        print "Create Matrix for dumping data from matrix A and C for parallel computation...."              
        A_ = np.memmap(A_name, dtype=A_in.dtype,shape=(d,nr+np.size(f,0)), mode='w+') 

        pixels_name = os.path.join(folder, 'pixels')

        C_name = os.path.join(folder, 'C_temp')          
        
        # Dump the input data to disk to free the memory
        dump(Y, pixels_name)
        dump(Cf, C_name)        
        
        # use mempry mapped versions of C and Y
        Y = load(pixels_name, mmap_mode='r')
        Cf = load(C_name, mmap_mode='r')
        
        pixel_groups=[range(i,i+n_pixels_per_process) for i in range(0,Y.shape[0]-n_pixels_per_process+1,n_pixels_per_process)]
        
        # Fork the worker processes to perform computation concurrently    
        print "start parallel pool..."
        sys.stdout.flush()
        Parallel(n_jobs=n_processes, backend=backend,verbose=100,max_nbytes=None)(delayed(lars_regression_noise_parallel)(Y,Cf,A_,sn,i,ind2_)
                            for i in pixel_groups) 
                        
            
        # if n_pixels_per_process is not a multiple of Y.shape[0] run on remaining pixels   
        pixels_remaining= Y.shape[0] %  n_pixels_per_process          
        if pixels_remaining>0:             
            print "Running deconvolution for remaining pixels:" + str(pixels_remaining)
            lars_regression_noise_parallel(Y,Cf,A_,sn,range(Y.shape[0]-pixels_remaining,Y.shape[0]),ind2_,positive=1)        
        A_=np.array(A_)
       
    elif backend == 'ipyparallel': # use the ipyparallel package, you need to start a cluster server (ipcluster command) in order to use it
              
        C_name = os.path.join(folder, 'C_temp.npy')        
        np.save(C_name,Cf)

        if type(Y) is np.core.memmap: # if input file is already memory mapped then find the filename 
            Y_name=Y.filename
        else:                        # if not create a memory mapped version (necessary for parallelization) 
            Y_name = os.path.join(folder, 'Y_temp.npy') 
            np.save(Y_name,Y)   
            Y=np.load(Y_name,mmap_mode='r') 
            
        # create arguments to be passed to the function. Here we are grouping bunch of pixels to be processed by each thread    
        pixel_groups=[(Y_name,C_name,sn,ind2_,range(i,i+n_pixels_per_process)) for i in range(0,d1*d2-n_pixels_per_process+1,n_pixels_per_process)]

        A_ = np.zeros((d,nr+np.size(f,0)))
        try: # if server is not running and raise exception if not installed or not started        
            from ipyparallel import Client
            c = Client()
        except:
            print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and  starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes"
            raise
        
        if len(c) <  n_processes:
            print len(c)
            raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value")            
            
        dview=c[:n_processes] # use the number of processes
        #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups)
        parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) 
        for chunk in parallel_result:
            for pars in chunk:
                px,idxs_,a=pars
                A_[px,idxs_]=a
        #clean up        
        dview.results.clear()   
        c.purge_results('all')
        c.purge_everything()
        c.close()
        
        
             
    elif backend=='single_thread':      

        Cf_=[Cf[idx_,:] for idx_ in ind2_]

        #% LARS regression 
        A_ = np.hstack((np.zeros((d,nr)),np.zeros((d,np.size(f,0)))))
        
        
        for c,y,s,id2_,px in zip(Cf_,Y,sn,ind2_,range(d)):
            if px%1000==0: 
                    print px
            if np.size(c)>0:                
                _, _, a, _ , _= lars_regression_noise(y, np.array(c.T), 1, sn[px]**2*T)
                if np.isscalar(a):
                    A_[px,id2_]=a
                else:
                    A_[px,id2_]=a.T
        
    else:
        raise Exception('Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel')
        
    #%
    print 'Updated Spatial Components'
    A_=threshold_components(A_, d1, d2)
    print "threshold"
    ff = np.where(np.sum(A_,axis=0)==0);           # remove empty components
    if np.size(ff)>0:
        ff = ff[0]
        warn('eliminating empty components!!')
        nr = nr - len(ff)
        A_ = np.delete(A_,list(ff),1)
        C = np.delete(C,list(ff),0)
    
    A_ = A_[:,:nr]                
    A_=coo_matrix(A_)

    if memory_efficient:
        print "Using memory efficient computation (slow but memory preserving)"
        A__=coo_matrix(A_,dtype=np.float32)
        C__=coo_matrix(C[:nr,:],dtype=np.float32)
        Y_res_name = os.path.join(folder, 'Y_res_temp.npy')
        Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='w+', shape=Y.shape)
        Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='r+', shape=Y.shape)
        print "computing residuals"        
        Y_res[:] = -A__.dot(C__).todense()[:]
        Y_res[:]+=Y
    else:   
        print "Using memory trade-off computation (good use of memory if input is memmaped)"         
        Y_res = Y - A_.dot(coo_matrix(C[:nr,:]))


    print "Computing A_bas"         
    A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update baseline based on residual
    Y_res[:]=1
    b = A_bas
    
    print("--- %s seconds ---" % (time.time() - start_time))
    
    try: #clean up
        # remove temporary file created
        print "Remove temporary file created"
        shutil.rmtree(folder)

    except:
        
        raise Exception("Failed to delete: " + folder)
        
    return A_,b,C
def run_CNMF_patches(file_name, shape, options, rf=16, stride = 4, n_processes=2, backend='single_thread'):
    """
    Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running
        
    Parameters
    ----------        
    file_name: string
        full path to an npy file (2D, pixels x time) containing the movie        
        
    shape: tuple of thre elements
        dimensions of the original movie across y, x, and time 
    
    options:
        dictionary containing all the parameters for the various algorithms
    
    rf: int 
        half-size of the square patch in pixel
    
    stride: int
        amount of overlap between patches
        
    backend: string
        'ipyparallel' or 'single_thread'
    
    
    Returns
    -------
    A_tot:
    
    C_tot:
    
    sn_tot:
    
    optional_outputs:    
    """
    (d1,d2,T)=shape
    d=d1*d2
    K=options['init_params']['K']
    
    idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf, stride = stride)
    
    args_in=[]    
    for id_f,id_2d in zip(idx_flat[:],idx_2d[:]):        
        args_in.append((file_name, id_f,id_2d[0].shape, options))

    print len(idx_flat)

    st=time.time()        
    
    if backend is 'ipyparallel':

        try:

            c = Client()   
            dview=c[:n_processes]
            file_res = dview.map_sync(cnmf_patches, args_in)        

        finally:
            
            dview.results.clear()   
            c.purge_results('all')
            c.purge_everything()
            c.close()                   

    elif backend is 'single_thread':

        file_res = map(cnmf_patches, args_in)                         

    else:
        raise Exception('Backend unknown')
            
      
    print time.time()-st
    
    
    # extract the values from the output of mapped computation
    num_patches=len(file_res)
    
    A_tot=scipy.sparse.csc_matrix((d,K*num_patches))
    C_tot=np.zeros((K*num_patches,T))
    sn_tot=np.zeros((d1*d2))
    b_tot=[]
    f_tot=[]
    bl_tot=[]
    c1_tot=[]
    neurons_sn_tot=[]
    g_tot=[]    
    idx_tot=[];
    shapes_tot=[]    
    id_patch_tot=[]
    
    count=0  
    patch_id=0

    print 'Transforming patches into full matrix'
    
    for idx_,shapes,A,b,C,f,S,bl,c1,neurons_sn,g,sn,_ in file_res:
    
        sn_tot[idx_]=sn
        b_tot.append(b)
        f_tot.append(f)
        bl_tot.append(bl)
        c1_tot.append(c1)
        neurons_sn_tot.append(neurons_sn)
        g_tot.append(g)
        idx_tot.append(idx_)
        shapes_tot.append(shapes)
        
        for ii in range(np.shape(A)[-1]):            
            new_comp=A.tocsc()[:,ii]/np.sqrt(np.sum(np.array(A.tocsc()[:,ii].todense())**2))
            if new_comp.sum()>0:
                A_tot[idx_,count]=new_comp
                C_tot[count,:]=C[ii,:]   
                id_patch_tot.append(patch_id)
                count+=1
        
        patch_id+=1      

    A_tot=A_tot[:,:count]
    C_tot=C_tot[:count,:]  
    
    optional_outputs=dict()
    optional_outputs['b_tot']=b_tot
    optional_outputs['f_tot']=f_tot
    optional_outputs['bl_tot']=bl_tot
    optional_outputs['c1_tot']=c1_tot
    optional_outputs['neurons_sn_tot']=neurons_sn_tot
    optional_outputs['g_tot']=g_tot
    optional_outputs['idx_tot']=idx_tot
    optional_outputs['shapes_tot']=shapes_tot
    optional_outputs['id_patch_tot']= id_patch_tot
    
    return A_tot,C_tot,sn_tot, optional_outputs