def myentropy(nn_model, weightlist, xdata, returnallp=False): ''' Usage: for NN_Dropout, use the same weights, duplicated N times for MFVI, pass the sampled weights ''' #assert xdata.shape[0]==2 n_samples = xdata.shape[1] p1narray = np.zeros((len(weightlist), n_samples)) #NWeightSamples x NPoints if type(nn_model) != list: for i, w in enumerate(weightlist): w = np.reshape(w, (1, nn_model.D)) p1narray[i, :] = nn_model.forward(w, xdata) #assumes that the 'model.forward' is dropout-like and has generates different outputs for each i elif type(nn_model) == list: # deterministic for i, nn in enumerate(nn_model): p1narray[i, :] = nn.forward(weightlist[i], xdata) #print (p_here.shape) # <<<<<<< HEAD certainpts = np.logical_or(np.all(p1narray==0, axis=0), np.all(p1narray==1, axis=0)) p2narray = 1 - p1narray p1narraym = np.mean(p1narray, axis=0) p2narraym = np.mean(p2narray, axis=0) Hpredcheck = -p1narraym*np.log(p1narraym) - p2narraym*np.log(p2narraym) Hpredcheck[certainpts] = 0.0 if returnallp: return p1narray, p1narraym, Hpredcheck else: return p1narraym, Hpredcheck
def test_masking(self): masks = cg.get_masks(20, 3) self.assertTrue(np.max([np.sum(m) for m in masks]) <= 3) all_m = np.full(20, False) no_m = np.full(20, True) for m in masks: all_m = np.logical_or(all_m, m) no_m = np.logical_xor(no_m, m) self.assertTrue(np.all(all_m)) self.assertTrue(~np.any(no_m))
def find_AR(bayes_post, subj_post, prior, randomize = False, clip = [-1000, 1000]): bayes_post = np.clip(bayes_post, 0.00000001, 0.99999999) subj_post = np.clip(subj_post, 0.00000001, 0.99999999) if randomize: which_urn = np.random.binomial(1, 0.5, bayes_post.shape) bayes_post, subj_post, prior = (which_urn*[bayes_post, subj_post, prior] + (1.0 - which_urn)*[1.0 - bayes_post, 1.0 - subj_post, 1.0 - prior]) B_post_odds = np.log(bayes_post/(1.0 - bayes_post)) S_post_odds = np.log(subj_post/(1.0 - subj_post)) BLLR = B_post_odds - np.log(prior/(1.0 - prior)) SLLR = S_post_odds - np.log(prior/(1.0 - prior)) exclusion = BLLR == 0.0 ARs = np.empty(BLLR.shape) ARs[exclusion] = 1.0 ARs[~exclusion] = SLLR[~exclusion]/BLLR[~exclusion] clip_mask = np.logical_or(ARs > clip[0], ARs > clip[1]) return clip_mask, 1.0 - prior, ARs
def compute_nj(y, var_distrib): ''' Compute nj for each variable y_j y (numobs x p ndarray): The original data var_distrib (p 1darray): The type of the variables in the data ------------------------------------------------------------------- returns (tuple (p 1d array, nb_bin 1d array, nb_ord 1d array)): The number of categories of all the variables, for count/bin variables only and for ordinal variables only ''' nj = [] nj_bin = [] nj_ord = [] nj_categ = [] for i in range(len(y.columns)): if np.logical_or(var_distrib[i] == 'bernoulli', var_distrib[i] == 'binomial'): max_nj = int(np.max(y.iloc[:, i], axis=0)) nj.append(max_nj) nj_bin.append(max_nj) elif var_distrib[i] == 'ordinal': card_nj = len(np.unique(y.iloc[:, i])) nj.append(card_nj) nj_ord.append(card_nj) elif var_distrib[i] == 'categorical': card_nj = len(np.unique(y.iloc[:, i])) nj.append(card_nj) nj_categ.append(card_nj) elif var_distrib[i] == 'continuous': nj.append(np.inf) else: raise ValueError('Data type', var_distrib[i], 'is illegal') nj = np.array(nj) nj_bin = np.array(nj_bin) nj_ord = np.array(nj_ord) nj_categ = np.array(nj_categ) return nj, nj_bin, nj_ord, nj_categ
def fit_weights_and_save(weights_file,ca_data_file='rs_vm_denoise_200605.npy',opto_silencing_data_file='vip_halo_data_for_sim.npy',opto_activation_data_file='vip_chrimson_data_for_sim.npy',constrain_wts=None,allow_var=True,fit_s02=True,constrain_isn=True,tv=False,l2_penalty=0.01,init_noise=0.1,init_W_from_lsq=False,scale_init_by=1,init_W_from_file=False,init_file=None,correct_Eta=False,init_Eta_with_s02=False,init_Eta12_with_dYY=False,use_opto_transforms=False): nsize,ncontrast = 6,6 npfile = np.load(ca_data_file,allow_pickle=True)[()]#,{'rs':rs,'rs_denoise':rs_denoise},allow_pickle=True) rs = npfile['rs'] #rs_denoise = npfile['rs_denoise'] nsize,ncontrast,ndir = 6,6,8 #ori_dirs = [[0,4],[2,6]] #[[0,4],[1,3,5,7],[2,6]] ori_dirs = [[0,1,2,3,4,5,6,7]] nT = len(ori_dirs) nS = len(rs[0]) def sum_to_1(r): R = r.reshape((r.shape[0],-1)) #R = R/np.nansum(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis] R = R/np.nansum(R,axis=1)[:,np.newaxis] # changed 8/28 return R def norm_to_mean(r): R = r.reshape((r.shape[0],-1)) R = R/np.nanmean(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis] return R Rs = [[None,None] for i in range(len(rs))] Rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))] rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))] for iR,r in enumerate(rs):#rs_denoise): print(iR) for ialign in range(nS): #Rs[iR][ialign] = r[ialign][:,:nsize,:] #sm = np.nanmean(np.nansum(np.nansum(Rs[iR][ialign],1),1)) #Rs[iR][ialign] = Rs[iR][ialign]/sm Rs[iR][ialign] = sum_to_1(r[ialign][:,:nsize,:]) # Rs[iR][ialign] = von_mises_denoise(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))) kernel = np.ones((1,2,2)) kernel = kernel/kernel.sum() for iR,r in enumerate(rs): for ialign in range(nS): for iori in range(nT): Rso[iR][ialign][iori] = np.nanmean(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]],-1) Rso[iR][ialign][iori][:,:,0] = np.nanmean(Rso[iR][ialign][iori][:,:,0],1)[:,np.newaxis] # average 0 contrast values Rso[iR][ialign][iori][:,1:,1:] = ssi.convolve(Rso[iR][ialign][iori],kernel,'valid') Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape(Rso[iR][ialign][iori].shape[0],-1) #Rso[iR][ialign][iori] = Rso[iR][ialign][iori]/np.nanmean(Rso[iR][ialign][iori],-1)[:,np.newaxis] def set_bound(bd,code,val=0): # set bounds to 0 where 0s occur in 'code' for iitem in range(len(bd)): bd[iitem][code[iitem]] = val nN = 36 nS = 2 nP = 2 nT = 1 nQ = 4 # code for bounds: 0 , constrained to 0 # +/-1 , constrained to +/-1 # 1.5, constrained to [0,1] # 2 , constrained to [0,inf) # -2 , constrained to (-inf,0] # 3 , unconstrained Wmx_bounds = 3*np.ones((nP,nQ),dtype=int) Wmx_bounds[0,1] = 0 # SSTs don't receive L4 input if allow_var: Wsx_bounds = 3*np.ones(Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds) Wsx_bounds[0,1] = 0 else: Wsx_bounds = np.zeros(Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds) Wmy_bounds = 3*np.ones((nQ,nQ),dtype=int) Wmy_bounds[0,:] = 2 # PCs are excitatory Wmy_bounds[1:,:] = -2 # all the cell types except PCs are inhibitory Wmy_bounds[1,1] = 0 # SSTs don't inhibit themselves # Wmy_bounds[3,1] = 0 # PVs are allowed to inhibit SSTs, consistent with Hillel's unpublished results, but not consistent with Pfeffer et al. Wmy_bounds[2,0] = 0 # VIPs don't inhibit L2/3 PCs. According to Pfeffer et al., only L5 PCs were found to get VIP inhibition if allow_var: Wsy_bounds = 3*np.ones(Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds) Wsy_bounds[1,1] = 0 Wsy_bounds[3,1] = 0 Wsy_bounds[2,0] = 0 else: Wsy_bounds = np.zeros(Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds) if not constrain_wts is None: for wt in constrain_wts: Wmy_bounds[wt[0],wt[1]] = 0 Wsy_bounds[wt[0],wt[1]] = 0 def tile_nS_nT_nN(kernel): row = np.concatenate([kernel for idim in range(nS*nT)],axis=0)[np.newaxis,:] tiled = np.concatenate([row for irow in range(nN)],axis=0) return tiled if fit_s02: s02_bounds = 2*np.ones((nQ,)) # permitting noise as a free parameter else: s02_bounds = np.ones((nQ,)) k_bounds = 1.5*np.ones((nQ*(nS-1),)) kappa_bounds = np.ones((1,)) # kappa_bounds = 2*np.ones((1,)) T_bounds = 1.5*np.ones((nQ*(nT-1),)) X_bounds = tile_nS_nT_nN(np.array([2,1])) # X_bounds = np.array([np.array([2,1,2,1])]*nN) Xp_bounds = tile_nS_nT_nN(np.array([3,1])) # Xp_bounds = np.array([np.array([3,1,3,1])]*nN) # Y_bounds = tile_nS_nT_nN(2*np.ones((nQ,))) # # Y_bounds = 2*np.ones((nN,nT*nS*nQ)) Eta_bounds = tile_nS_nT_nN(3*np.ones((nQ,))) # Eta_bounds = 3*np.ones((nN,nT*nS*nQ)) if allow_var: Xi_bounds = tile_nS_nT_nN(3*np.ones((nQ,))) else: Xi_bounds = tile_nS_nT_nN(np.zeros((nQ,))) # Xi_bounds = 3*np.ones((nN,nT*nS*nQ)) h1_bounds = -2*np.ones((1,)) h2_bounds = 2*np.ones((1,)) # In[8]: # shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ,),(1,),(nN,nS*nP),(nN,nS*nQ),(nN,nS*nQ),(nN,nS*nQ)] shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ*(nS-1),),(1,),(nQ*(nT-1),),(nN,nT*nS*nP),(nN,nT*nS*nP),(nN,nT*nS*nQ),(nN,nT*nS*nQ),(1,),(1,),(nN,nT*nS*nQ),(nN,nT*nS*nQ)] print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes]))) # Wmx, Wmy, Wsx, Wsy, s02, k, kappa,T, XX, XXp, Eta, Xi, h1, h2, Eta1, Eta2 lb = [-np.inf*np.ones(shp) for shp in shapes] ub = [np.inf*np.ones(shp) for shp in shapes] bdlist = [Wmx_bounds,Wmy_bounds,Wsx_bounds,Wsy_bounds,s02_bounds,k_bounds,kappa_bounds,T_bounds,X_bounds,Xp_bounds,Eta_bounds,Xi_bounds,h1_bounds,h2_bounds,Eta_bounds,Eta_bounds] set_bound(lb,[bd==0 for bd in bdlist],val=0) set_bound(ub,[bd==0 for bd in bdlist],val=0) set_bound(lb,[bd==2 for bd in bdlist],val=0) set_bound(ub,[bd==-2 for bd in bdlist],val=0) set_bound(lb,[bd==1 for bd in bdlist],val=1) set_bound(ub,[bd==1 for bd in bdlist],val=1) set_bound(lb,[bd==1.5 for bd in bdlist],val=0) set_bound(ub,[bd==1.5 for bd in bdlist],val=1) set_bound(lb,[bd==-1 for bd in bdlist],val=-1) set_bound(ub,[bd==-1 for bd in bdlist],val=-1) # for bd in [lb,ub]: # for ind in [2,3]: # bd[ind][:,1] = 0 # temporary for no variation expt. # lb[2] = np.zeros_like(lb[2]) # lb[3] = np.zeros_like(lb[3]) # lb[4] = np.ones_like(lb[4]) # lb[5] = np.zeros_like(lb[5]) # ub[2] = np.zeros_like(ub[2]) # ub[3] = np.zeros_like(ub[3]) # ub[4] = np.ones_like(ub[4]) # ub[5] = np.ones_like(ub[5]) # temporary for no variation expt. lb = np.concatenate([a.flatten() for a in lb]) ub = np.concatenate([b.flatten() for b in ub]) bounds = [(a,b) for a,b in zip(lb,ub)] # In[10]: nS = 2 print('nT: '+str(nT)) ndims = 5 ncelltypes = 5 Yhat = [[None for iT in range(nT)] for iS in range(nS)] Xhat = [[None for iT in range(nT)] for iS in range(nS)] Ypc_list = [[None for iT in range(nT)] for iS in range(nS)] Xpc_list = [[None for iT in range(nT)] for iS in range(nS)] mx = [None for iS in range(nS)] for iS in range(nS): mx[iS] = np.zeros((ncelltypes,)) yy = [None for icelltype in range(ncelltypes)] for icelltype in range(ncelltypes): yy[icelltype] = np.nanmean(Rso[icelltype][iS][0],0) mx[iS][icelltype] = np.nanmax(yy[icelltype]) for iT in range(nT): y = [np.nanmean(Rso[icelltype][iS][iT],axis=0)[:,np.newaxis]/mx[iS][icelltype] for icelltype in range(1,ncelltypes)] Ypc_list[iS][iT] = [None for icelltype in range(1,ncelltypes)] for icelltype in range(1,ncelltypes): rss = Rso[icelltype][iS][iT].copy()#/mx[iS][icelltype] #.reshape(Rs[icelltype][ialign].shape[0],-1) #rss = Rso[icelltype][iS][iT].copy() #.reshape(Rs[icelltype][ialign].shape[0],-1) rss = rss[np.isnan(rss).sum(1)==0] # print(rss.max()) # rss[rss<0] = 0 # rss = rss[np.random.randn(rss.shape[0])>0] try: u,s,v = np.linalg.svd(rss-np.mean(rss,0)[np.newaxis]) Ypc_list[iS][iT][icelltype-1] = [(s[idim],v[idim]) for idim in range(ndims)] # print('yep on Y') # print(np.min(np.sum(rs[icelltype][iS][iT],axis=1))) except: # print('nope on Y') print(np.mean(np.isnan(rss))) print(np.min(np.sum(rs[icelltype][iS][iT],axis=1))) Yhat[iS][iT] = np.concatenate(y,axis=1) # x = sim_utils.columnize(Rso[0][iS][iT])[:,np.newaxis] icelltype = 0 #x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]#/mx[iS][icelltype] x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]/mx[iS][icelltype] # opto_column = np.concatenate((np.zeros((nN,)),np.zeros((nNO/2,)),np.ones((nNO/2,))),axis=0)[:,np.newaxis] Xhat[iS][iT] = np.concatenate((x,np.ones_like(x)),axis=1) # Xhat[iS][iT] = np.concatenate((x,np.ones_like(x),opto_column),axis=1) icelltype = 0 #rss = Rso[icelltype][iS][iT].copy()/mx[iS][icelltype] rss = Rso[icelltype][iS][iT].copy() rss = rss[np.isnan(rss).sum(1)==0] # try: u,s,v = np.linalg.svd(rss-rss.mean(0)[np.newaxis]) Xpc_list[iS][iT] = [None for iinput in range(2)] Xpc_list[iS][iT][0] = [(s[idim],v[idim]) for idim in range(ndims)] Xpc_list[iS][iT][1] = [(0,np.zeros((Xhat[0][0].shape[0],))) for idim in range(ndims)] # except: # print('nope on X') # print(np.mean(np.isnan(rss))) # print(np.min(np.sum(Rso[icelltype][iS][iT],axis=1))) nN,nP = Xhat[0][0].shape print('nP: '+str(nP)) nQ = Yhat[0][0].shape[1] # In[11]: def compute_f_(Eta,Xi,s02): return sim_utils.f_miller_troyer(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)])) def compute_fprime_m_(Eta,Xi,s02): return sim_utils.fprime_miller_troyer(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)]))*Xi def compute_fprime_s_(Eta,Xi,s02): s2 = Xi**2+np.concatenate((s02,s02),axis=0) return sim_utils.fprime_s_miller_troyer(Eta,s2)*(Xi/s2) def sorted_r_eigs(w): drW,prW = np.linalg.eig(w) srtinds = np.argsort(drW) return drW[srtinds],prW[:,srtinds] # In[12]: # 0.Wmx, 1.Wmy, 2.Wsx, 3.Wsy, 4.s02,5.K, 6.kappa,7.T,8.XX, 9.XXp, 10.Eta, 11.Xi, 12.h1, 13.h2, 14.Eta1, 15.Eta2 shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ*(nS-1),),(1,),(nQ*(nT-1),),(nN,nT*nS*nP),(nN,nT*nS*nP),(nN,nT*nS*nQ),(nN,nT*nS*nQ),(1,),(1,),(nN,nT*nS*nQ),(nN,nT*nS*nQ)] print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes]))) import calnet.fitting_spatial_feature import sim_utils YYhat = calnet.utils.flatten_nested_list_of_2d_arrays(Yhat) XXhat = calnet.utils.flatten_nested_list_of_2d_arrays(Xhat) opto_dict = np.load(opto_silencing_data_file,allow_pickle=True)[()] Yhat_opto = opto_dict['Yhat_opto'] Yhat_opto = np.nanmean(np.reshape(Yhat_opto,(nN,2,nS,2,nQ)),3).reshape((nN*2,-1)) Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] print(Yhat_opto.shape) h_opto = opto_dict['h_opto'] #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2] YYhat_halo = Yhat_opto.reshape((nN,2,-1)) opto_transform1 = calnet.utils.fit_opto_transform(YYhat_halo) opto_transform1.res[:,[0,2,3,4,6,7]] = 0 dYY1 = opto_transform1.transform(YYhat) - YYhat #YYhat_halo_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_halo) #dYY1 = YYhat_halo_sim[:,1,:] - YYhat_halo_sim[:,0,:] def overwrite_plus_n(arr,to_overwrite,n): arr[:,to_overwrite] = arr[:,int(to_overwrite+n)] return arr for to_overwrite in [1,2]: n = 4 dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \ = [overwrite_plus_n(x,to_overwrite,n) for x in \ [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]] for to_overwrite in [7]: n = -4 dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \ = [overwrite_plus_n(x,to_overwrite,n) for x in \ [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]] #for to_overwrite in [1,2]: # dYY1[:,to_overwrite] = dYY1[:,to_overwrite+4] #for to_overwrite in [7]: # dYY1[:,to_overwrite] = dYY1[:,to_overwrite-4] #Yhat_opto = opto_dict['Yhat_opto'] #for iS in range(nS): # mx = np.zeros((nQ,)) # for iQ in range(nQ): # slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ) # mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer]) # Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ] ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] #print(Yhat_opto.shape) #h_opto = opto_dict['h_opto'] #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2] #for to_overwrite in [1,2,5,6]: # overwrite sst and vip with off-centered values # dYY1[:,to_overwrite] = dYY1[:,to_overwrite+8] #for to_overwrite in [11,15]: # dYY1[:,to_overwrite] = np.nan #dYY1[:,to_overwrite-8] opto_dict = np.load(opto_activation_data_file,allow_pickle=True)[()] Yhat_opto = opto_dict['Yhat_opto'] Yhat_opto = np.nanmean(np.reshape(Yhat_opto,(nN,2,nS,2,nQ)),3).reshape((nN*2,-1)) Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] print(Yhat_opto.shape) h_opto = opto_dict['h_opto'] #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2] YYhat_chrimson = Yhat_opto.reshape((nN,2,-1)) opto_transform2 = calnet.utils.fit_opto_transform(YYhat_chrimson) dYY2 = opto_transform2.transform(YYhat) - YYhat #YYhat_chrimson_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_chrimson) #dYY2 = YYhat_chrimson_sim[:,1,:] - YYhat_chrimson_sim[:,0,:] #Yhat_opto = opto_dict['Yhat_opto'] #for iS in range(nS): # mx = np.zeros((nQ,)) # for iQ in range(nQ): # slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ) # mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer]) # Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ] ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] #print(Yhat_opto.shape) #h_opto = opto_dict['h_opto'] #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2] print('dYY1 mean: %03f'%np.nanmean(np.abs(dYY1))) print('dYY2 mean: %03f'%np.nanmean(np.abs(dYY2))) dYY = np.concatenate((dYY1,dYY2),axis=0) titles = ['VIP silencing','VIP activation'] for itype in [0,1,2,3]: plt.figure(figsize=(5,2.5)) for iyy,dyy in enumerate([dYY1,dYY2]): plt.subplot(1,2,iyy+1) if np.sum(np.isnan(dyy[:,itype]))==0: sca.scatter_size_contrast(YYhat[:,itype],YYhat[:,itype]+dyy[:,itype],nsize=6,ncontrast=6)#,mn=0) plt.title(titles[iyy]) plt.xlabel('cell type %d event rate, \n light off'%itype) plt.ylabel('cell type %d event rate, \n light on'%itype) ut.erase_top_right() plt.tight_layout() ut.mkdir('figures') plt.savefig('figures/scatter_light_on_light_off_target_celltype_%d.eps'%itype) opto_mask = ~np.isnan(dYY) #dYY[nN:][~opto_mask[nN:]] = -dYY[:nN][~opto_mask[nN:]] print('mean of opto_mask: '+str(opto_mask.mean())) #dYY[~opto_mask] = 0 def zero_nans(arr): arr[np.isnan(arr)] = 0 return arr #dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\ # opto_transform2.slope,opto_transform2.intercept,opto_transform2.res\ # = [zero_nans(x) for x in \ # [dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\ # opto_transform2.slope,opto_transform2.intercept,opto_transform2.res]] dYY = zero_nans(dYY) to_adjust = np.logical_or(np.isnan(opto_transform2.slope[0]),np.isnan(opto_transform2.intercept[0])) opto_transform2.slope[:,to_adjust] = 1/opto_transform1.slope[:,to_adjust] opto_transform2.intercept[:,to_adjust] = -opto_transform1.intercept[:,to_adjust]/opto_transform1.slope[:,to_adjust] opto_transform2.res[:,to_adjust] = -opto_transform1.res[:,to_adjust]/opto_transform1.slope[:,to_adjust] np.save('/Users/dan/Documents/notebooks/mossing-PC/shared_data/calnet_data/dYY.npy',dYY) from importlib import reload reload(calnet) #reload(calnet.fitting_spatial_feature_opto_nonlinear) reload(sim_utils) # reload(calnet.fitting_spatial_feature) # W0list = [np.ones(shp) for shp in shapes] wt_dict = {} wt_dict['X'] = 1 wt_dict['Y'] = 15 wt_dict['Eta'] = 10 # 1 # wt_dict['Xi'] = 0.1 wt_dict['stims'] = np.ones((nN,1)) #(np.arange(30)/30)[:,np.newaxis]**1 # wt_dict['barrier'] = 0. #30.0 #0.1 wt_dict['opto'] = 1e-1#1e1 wt_dict['isn'] = 3 wt_dict['tv'] = 1 wt_dict['stimsOpto'] = 0.6*np.ones((nN,1)) wt_dict['stimsOpto'][0::6] = 3 wt_dict['celltypesOpto'] = 0.67*np.ones((1,nQ*nS*nT)) wt_dict['celltypesOpto'][0,0::nQ] = 2 wt_dict['dirOpto'] = np.array((1,0.5)) wt_dict['dYY'] = 1#1000 wt_dict['Eta12'] = 1 wt_dict['coupling'] = 1 np.save('XXYYhat.npy',{'YYhat':YYhat,'XXhat':XXhat,'rs':rs,'Rs':Rs,'Rso':Rso,'Ypc_list':Ypc_list,'Xpc_list':Xpc_list}) Eta0 = invert_f_mt(YYhat) Eta10 = invert_f_mt(YYhat + dYY[:nN]) Eta20 = invert_f_mt(YYhat + dYY[nN:]) print('mean Eta1 diff: '+str(np.mean(np.abs(Eta0-Eta10)))) print('mean Eta2 diff: '+str(np.mean(np.abs(Eta0-Eta20)))) ntries = 1 nhyper = 1 dt = 1e-1 niter = int(np.round(10/dt)) #int(1e4) perturbation_size = 5e-2 # learning_rate = 1e-4 # 1e-5 #np.linspace(3e-4,1e-3,niter+1) # 1e-5 #l2_penalty = 0.1 Wt = [[None for itry in range(ntries)] for ihyper in range(nhyper)] loss = np.zeros((nhyper,ntries)) is_neg = np.array([b[1] for b in bounds])==0 counter = 0 negatize = [np.zeros(shp,dtype='bool') for shp in shapes] print(shapes) for ishp,shp in enumerate(shapes): nel = np.prod(shp) negatize[ishp][:][is_neg[counter:counter+nel].reshape(shp)] = True counter = counter + nel for ihyper in range(nhyper): for itry in range(ntries): print((ihyper,itry)) W0list = [init_noise*(ihyper+1)*np.random.rand(*shp) for shp in shapes] print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes]))) print('size of w0: '+str(np.sum([np.size(x) for x in W0list]))) print('len(W0list) : '+str(len(W0list))) counter = 0 for ishp,shp in enumerate(shapes): W0list[ishp][negatize[ishp]] = -W0list[ishp][negatize[ishp]] W0list[4] = np.ones(shapes[5]) # s02 W0list[5] = np.ones(shapes[5]) # K W0list[6] = np.ones(shapes[6]) # kappa W0list[7] = np.ones(shapes[7]) # T W0list[8] = np.concatenate(Xhat,axis=1) #XX W0list[9] = np.zeros_like(W0list[8]) #XXp W0list[10] = Eta0.copy() #np.zeros(shapes[10]) #Eta W0list[11] = np.zeros(shapes[11]) #Xi W0list[14] = Eta10.copy() # Eta1 W0list[15] = Eta20.copy() # Eta2 #[Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi] # W0list = Wstar_dict['as_list'].copy() # W0list[1][1,0] = -1.5 # W0list[1][3,0] = -1.5 if init_W_from_lsq: W0list[0],W0list[1] = initialize_W(Xhat,Yhat,scale_by=scale_init_by) for ivar in range(0,2): W0list[ivar] = W0list[ivar] + init_noise*np.random.randn(*W0list[ivar].shape) if constrain_isn: W0list[1][0,0] = 3 W0list[1][0,3] = 5 W0list[1][3,0] = -5 W0list[1][3,3] = -5 #if constrain_isn: # W0list[1][0,0] = 2 # W0list[1][0,3] = 2 # W0list[1][3,0] = -2 # W0list[1][3,3] = -2 #if wt_dict['coupling'] > 0: # W0list[1][1,0] = -1 if init_W_from_file: npyfile = np.load(init_file,allow_pickle=True)[()] W0list = npyfile['as_list'] if W0list[8].size == nN*nS*2*nP: W0list[7] = np.array(()) W0list[1][1,0] = W0list[1][1,0] W0list[8] = np.nanmean(W0list[8].reshape((nN,nS,2,nP)),2).flatten() #XX W0list[9] = np.nanmean(W0list[9].reshape((nN,nS,2,nP)),2).flatten() #XXp W0list[10] = np.nanmean(W0list[10].reshape((nN,nS,2,nQ)),2).flatten() #Eta W0list[11] = np.nanmean(W0list[11].reshape((nN,nS,2,nQ)),2).flatten() #Xi if correct_Eta: W0list[10] = Eta0.copy() if len(W0list) < len(shapes): W0list = W0list[:-1] + [np.array(-0.5),np.array(1),Eta10.copy(),Eta20.copy()] # add h1,h2,Eta1,Eta2 if init_Eta_with_s02: s02 = W0list[4].copy() Eta0 = invert_f_mt_with_s02(YYhat,s02,nS=nS,nT=nT) Eta10 = invert_f_mt_with_s02(YYhat+dYY[:nN],s02,nS=nS,nT=nT) Eta20 = invert_f_mt_with_s02(YYhat+dYY[nN:],s02,nS=nS,nT=nT) W0list[10] = Eta0.copy() W0list[14] = Eta10.copy() W0list[15] = Eta20.copy() if init_Eta12_with_dYY: Eta0 = W0list[10].copy().reshape((nN,nQ*nS*nT)) Xi0 = W0list[11].copy().reshape((nN,nQ*nS*nT)) s020 = W0list[4].copy() YY0s = compute_f_(Eta0,Xi0,s020) this_YY1 = opto_transform1.transform(YY0s) this_YY2 = opto_transform2.transform(YY0s) Eta10 = invert_f_mt_with_s02(this_YY1,s020,nS=nS,nT=nT) Eta20 = invert_f_mt_with_s02(this_YY2,s020,nS=nS,nT=nT) W0list[14] = Eta10.copy() W0list[15] = Eta20.copy() YY10s = compute_f_(Eta10,Xi0,s020) YY20s = compute_f_(Eta20,Xi0,s020) titles = ['VIP silencing','VIP activation'] for itype in [0,1,2,3]: plt.figure(figsize=(5,2.5)) for iyy,yy in enumerate([YY10s,YY20s]): plt.subplot(1,2,iyy+1) if np.sum(np.isnan(yy[:,itype]))==0: sca.scatter_size_contrast(YY0s[:,itype],yy[:,itype],nsize=6,ncontrast=6)#,mn=0) plt.title(titles[iyy]) plt.xlabel('cell type %d event rate, \n light off'%itype) plt.ylabel('cell type %d event rate, \n light on'%itype) ut.erase_top_right() plt.tight_layout() ut.mkdir('figures') plt.savefig('figures/scatter_light_on_light_off_init_celltype_%d.eps'%itype) #if wt_dict['coupling'] > 0: # W0list[1][1,0] = W0list[1][1,0] - 1 for ivar in [0,1,4,5]: # Wmx, Wmy, s02, k W0list[ivar] = W0list[ivar] + init_noise*np.random.randn(*W0list[ivar].shape) # wt_dict['Xi'] = 10 # wt_dict['Eta'] = 10 print('size of bounds: '+str(np.sum([np.size(x) for x in bdlist]))) print('size of w0: '+str(np.sum([np.size(x) for x in W0list]))) print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes]))) Wt[ihyper][itry],loss[ihyper][itry],gr,hess,result = calnet.fitting_spatial_feature_opto_nonlinear.fit_W_sim(Xhat,Xpc_list,Yhat,Ypc_list,pop_rate_fn=sim_utils.f_miller_troyer,pop_deriv_fn=sim_utils.fprime_miller_troyer,neuron_rate_fn=sim_utils.evaluate_f_mt,W0list=W0list.copy(),bounds=bounds,niter=niter,wt_dict=wt_dict,l2_penalty=l2_penalty,compute_hessian=False,dt=dt,perturbation_size=perturbation_size,dYY=dYY,constrain_isn=constrain_isn,tv=tv,opto_mask=opto_mask,use_opto_transforms=use_opto_transforms,opto_transform1=opto_transform1,opto_transform2=opto_transform2) # Wt[ihyper][itry] = [w[-1] for w in Wt_temp] # loss[ihyper,itry] = loss_temp[-1] def parse_W(W): Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2 = W return Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2 itry = 0 Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2 = parse_W(Wt[0][0]) labels = ['Wmx','Wmy','Wsx','Wsy','s02','K','kappa','T','XX','XXp','Eta','Xi','h1','h2','Eta1','Eta2'] Wstar_dict = {} for i,label in enumerate(labels): Wstar_dict[label] = Wt[0][0][i] Wstar_dict['as_list'] = [Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2] Wstar_dict['loss'] = loss[0][0] Wstar_dict['wt_dict'] = wt_dict np.save(weights_file,Wstar_dict,allow_pickle=True)
def DDGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True): ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM) y (numobs x p ndarray): The observations containing categorical variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' prev_lik = -1E16 best_lik = -1E16 tol = 0.01 max_patience = 1 patience = 0 best_k = deepcopy(k) best_r = deepcopy(r) best_sil = -1 new_sil = -1 # Initialize the parameters eta = deepcopy(init['eta']) psi = deepcopy(init['psi']) lambda_bin = deepcopy(init['lambda_bin']) lambda_ord = deepcopy(init['lambda_ord']) lambda_categ = deepcopy(init['lambda_categ']) H = deepcopy(init['H']) w_s = deepcopy( init['w_s'] ) # Probability of path s' through the network for all s' in Omega numobs = len(y) likelihood = [] it_num = 0 ratio = 1000 np.random.seed = seed # Dispatch variables between categories y_bin = y[:, np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')] nj_bin = nj[np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')].astype(int) nb_bin = len(nj_bin) y_categ = y[:, var_distrib == 'categorical'] nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_categ = len(nj_categ) y_ord = y[:, var_distrib == 'ordinal'] nj_ord = nj[var_distrib == 'ordinal'].astype(int) nb_ord = len(nj_ord) L = len(k) k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)]) M = M_growth(1, r, numobs) assert nb_ord + nb_bin + nb_categ > 0 # Compute the Gower matrix cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli') dm = gower_matrix(y, cat_features=cat_features) while (it_num < it) & ((ratio > eps) | (patience <= max_patience)): print(it_num) # The clustering layer is the one used to perform the clustering # i.e. the layer l such that k[l] == n_clusters clustering_layer = np.argmax(np.array(k) == n_clusters) ##################################################################################### ################################# S step ############################################ ##################################################################################### #===================================================================== # Draw from f(z^{l} | s, Theta) for all s in Omega #===================================================================== mu_s, sigma_s = compute_path_params(eta, H, psi) sigma_s = ensure_psd(sigma_s) z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M) ''' print('mu_s', np.abs(mu_s[0]).mean()) print('sigma_s', np.abs(sigma_s[0]).mean()) print('z_s0', np.abs(z_s[0]).mean()) print('z_s1', np.abs(z_s[1]).mean(0)[:,0]) ''' #======================================================================== # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1 #======================================================================== chsi = compute_chsi(H, psi, mu_s, sigma_s) chsi = ensure_psd(chsi) rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi) # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively z2_z1s = draw_z2_z1s(chsi, rho, M, r) #======================================================================= # Compute the p(y| z1) for all variable categories #======================================================================= py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord, lambda_categ, y_categ, nj_categ, z_s[0]) #======================================================================== # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s #======================================================================== zl1_ys = draw_zl1_ys(z_s, py_zl1, M) ##################################################################################### ################################# E step ############################################ ##################################################################################### #===================================================================== # Compute conditional probabilities used in the appendix of asta paper #===================================================================== pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s, py_zl1) #del(py_zl1) #===================================================================== # Compute p(z^{(l)}| s, y). Equation (5) of the paper #===================================================================== pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S) pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s) #===================================================================== # Compute MFA expectations #===================================================================== Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \ E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S) ########################################################################### ############################ M step ####################################### ########################################################################### #======================================================= # Compute MFA Parameters #======================================================= w_s = np.mean(ps_y, axis=0) eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H, k) #======================================================= # Identifiability conditions #======================================================= # Update eta, H and Psi values H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) del (Ez) #======================================================= # Compute GLLVM Parameters #======================================================= # We optimize each column separately as it is faster than all column jointly # (and more relevant with the independence hypothesis) lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) ########################################################################### ################## Clustering parameters updating ######################### ########################################################################### new_lik = np.sum(np.log(p_y)) likelihood.append(new_lik) ratio = (new_lik - prev_lik) / abs(prev_lik) print(likelihood) idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1])) psl_y = ps_y.reshape(numobs, *k, order='C').sum(idx_to_sum) temp_class = np.argmax(psl_y, axis=1) try: new_sil = silhouette_score(dm, temp_class, metric='precomputed') except ValueError: new_sil = -1 print('Silhouette score:', new_sil) if best_sil < new_sil: z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1) best_sil = deepcopy(new_sil) classes = deepcopy(temp_class) fig = plt.figure(figsize=(8, 8)) plt.scatter(z[:, 0], z[:, 1]) plt.show() # Refresh the classes only if they provide a better explanation of the data if best_lik < new_lik: best_lik = deepcopy(prev_lik) if prev_lik < new_lik: patience = 0 M = M_growth(it_num + 2, r, numobs) else: patience += 1 ########################################################################### ######################## Parameter selection ############################# ########################################################################### is_not_min_specif = not (np.all(np.array(k) == n_clusters) & np.array_equal(r, [2, 1])) if look_for_simpler_network( it_num) & perform_selec & is_not_min_specif: r_to_keep = r_select(y_bin, y_ord, y_categ, zl1_ys, z2_z1s, w_s) # If r_l == 0, delete the last l + 1: layers new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1 k_to_keep = k_select(w_s, k, new_L, clustering_layer) is_L_unchanged = L == new_L is_r_unchanged = np.all( [len(r_to_keep[l]) == r[l] for l in range(new_L + 1)]) is_k_unchanged = np.all( [len(k_to_keep[l]) == k[l] for l in range(new_L)]) is_selection = not (is_r_unchanged & is_k_unchanged & is_L_unchanged) assert new_L > 0 if is_selection: eta = [eta[l][k_to_keep[l]] for l in range(new_L)] eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][k_to_keep[l]] for l in range(new_L)] H = [H[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)] psi = [psi[l][k_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)] if nb_bin > 0: # Add the intercept: bin_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1 ]) lambda_bin = lambda_bin[:, bin_r_to_keep] if nb_ord > 0: # Intercept coefficients handling is a little more complicated here lambda_ord_intercept = [ lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord ] Lambda_ord_var = np.stack( [lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord]) Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]] lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\ for j in range(nb_ord)] if nb_categ > 0: lambda_categ_intercept = [ lambda_categ[j][:, 0] for j in range(nb_categ) ] Lambda_categ_var = [ lambda_categ_j[:, -r[0]:] for lambda_categ_j in lambda_categ ] Lambda_categ_var = [ lambda_categ_j[:, r_to_keep[0]] for lambda_categ_j in lambda_categ ] lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\ for j in range(nb_categ)] w = w_s.reshape(*k, order='C') new_k_idx_grid = np.ix_(*k_to_keep[:new_L]) # If layer deletion, sum the last components of the paths if L > new_L: deleted_dims = tuple(range(L)[new_L:]) w_s = w[new_k_idx_grid].sum(deleted_dims).flatten( order='C') else: w_s = w[new_k_idx_grid].flatten(order='C') w_s /= w_s.sum() k = [len(k_to_keep[l]) for l in range(new_L)] r = [len(r_to_keep[l]) for l in range(new_L + 1)] k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)]) L = new_L patience = 0 best_r = deepcopy(r) best_k = deepcopy(k) # Identifiability conditions H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) print('New architecture:') print('k', k) print('r', r) print('L', L) print('S', S) print("w_s", len(w_s)) prev_lik = deepcopy(new_lik) it_num = it_num + 1 out = dict(likelihood = likelihood, classes = classes, z = z, \ best_r = best_r, best_k = best_k) return (out)
def MDGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True): ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM) y (numobs x p ndarray): The observations containing mixed variables n_clusters (int or str): The number of clusters to look for in the data or the use mode of the MDGMM r (dict): The dimension of latent variables through the first 2 layers k (dict): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable For categorical data: the number of different existing categories for each variable it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' # Break the reference link k = deepcopy(k) r = deepcopy(r) best_k = deepcopy(k) best_r = deepcopy(r) # Add other checks for the other variables check_inputs(k, r) prev_lik = - 1E15 best_lik = -1E15 tol = 0.01 max_patience = 1 patience = 0 #==================================================== # Initialize the parameters #==================================================== eta_c, eta_d, H_c, H_d, psi_c, psi_d = dispatch_dgmm_init(init) lambda_bin, lambda_ord, lambda_categ = dispatch_gllvm_init(init) w_s_c, w_s_d = dispatch_paths_init(init) numobs = len(y) likelihood = [] it_num = 0 ratio = 1000 np.random.seed = seed #==================================================== # Dispatch variables between categories #==================================================== y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',\ var_distrib == 'binomial')] nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',\ var_distrib == 'binomial')] nj_bin = nj_bin.astype(int) nb_bin = len(nj_bin) y_ord = y[:, var_distrib == 'ordinal'] nj_ord = nj[var_distrib == 'ordinal'] nj_ord = nj_ord.astype(int) nb_ord = len(nj_ord) y_categ = y[:, var_distrib == 'categorical'] nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_categ = len(nj_categ) yc = y[:, var_distrib == 'continuous'] ss = StandardScaler() yc = ss.fit_transform(yc) nb_cont = yc.shape[1] # *_1L standsds for quantities going through all the network (head + tail) k_1L, L_1L, L, bar_L, S_1L = nb_comps_and_layers(k) r_1L = {'c': r['c'] + r['t'], 'd': r['d'] + r['t'], 't': r['t']} best_sil = [-1.1 for l in range(L['t'] - 1)] if n_clusters == 'multi' else -1.1 new_sil = [-1.1 for l in range(L['t'] - 1)] if n_clusters == 'multi' else -1.1 M = M_growth(1, r_1L, numobs) if nb_bin + nb_ord + nb_categ == 0: # Create the InputError class and change this raise ValueError('Input does not contain discrete variables,\ consider using a regular DGMM') if nb_cont == 0: # Create the InputError class and change this raise ValueError('Input does not contain continuous values,\ consider using a DDGMM') # Compute the Gower matrix cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli') dm = gower_matrix(y, cat_features = cat_features) while (it_num < it) & ((ratio > eps) | (patience <= max_patience)): print(it_num) # The clustering layer is the one used to perform the clustering # i.e. the layer l such that k[l] == n_clusters if not(isnumeric(n_clusters)): if n_clusters == 'auto': clustering_layer = 0 elif n_clusters == 'multi': clustering_layer = list(range(L['t'] - 1)) else: raise ValueError('Please enter an int, auto or multi for n_clusters') else: assert (np.array(k['t']) == n_clusters).any() clustering_layer = np.argmax(np.array(k['t']) == n_clusters) ##################################################################################### ################################# MC step ############################################ ##################################################################################### #===================================================================== # Draw from f(z^{l} | s, Theta) for both heads and tail #===================================================================== mu_s_c, sigma_s_c = compute_path_params(eta_c, H_c, psi_c) sigma_s_c = ensure_psd(sigma_s_c) mu_s_d, sigma_s_d = compute_path_params(eta_d, H_d, psi_d) sigma_s_d = ensure_psd(sigma_s_d) z_s_c, zc_s_c, z_s_d, zc_s_d = draw_z_s_all_network(mu_s_c, sigma_s_c,\ mu_s_d, sigma_s_d, yc, eta_c, eta_d, S_1L, L, M) #======================================================================== # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1 #======================================================================== # Create wrapper as before and after chsi_c = compute_chsi(H_c, psi_c, mu_s_c, sigma_s_c) chsi_c = ensure_psd(chsi_c) rho_c = compute_rho(eta_c, H_c, psi_c, mu_s_c, sigma_s_c, zc_s_c, chsi_c) chsi_d = compute_chsi(H_d, psi_d, mu_s_d, sigma_s_d) chsi_d = ensure_psd(chsi_d) rho_d = compute_rho(eta_d, H_d, psi_d, mu_s_d, sigma_s_d, zc_s_d, chsi_d) # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively z2_z1s_c, z2_z1s_d = draw_z2_z1s_network(chsi_c, chsi_d, rho_c, \ rho_d, M, r_1L, L) #======================================================================= # Compute the p(y^D| z1) for all discrete variables #======================================================================= py_zl1_d = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord,\ lambda_categ, y_categ, nj_categ, z_s_d[0]) #======================================================================== # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s #======================================================================== zl1_ys_d = draw_zl1_ys(z_s_d, py_zl1_d, M['d']) ##################################################################################### ################################# E step ############################################ ##################################################################################### #===================================================================== # Compute quantities necessary for E steps of both heads and tail #===================================================================== # Discrete head quantities pzl1_ys_d, ps_y_d, py_d = E_step_GLLVM(z_s_d[0], mu_s_d[0], sigma_s_d[0], w_s_d, py_zl1_d) py_s_d = ps_y_d * py_d / w_s_d[n_axis] # Continuous head quantities ps_y_c, py_s_c, py_c = continuous_lik(yc, mu_s_c[0], sigma_s_c[0], w_s_c) pz_s_d = fz_s(z_s_d, mu_s_d, sigma_s_d) pz_s_c = fz_s(z_s_c, mu_s_c, sigma_s_c) #===================================================================== # Compute p(z^{(l)}| s, y). Equation (5) of the paper #===================================================================== # Compute pz2_z1s_d and pz2_z1s_d for the tail indices whereas it is useless pz2_z1s_d = fz2_z1s(t(pzl1_ys_d, (1, 0, 2)), z2_z1s_d, chsi_d, rho_d, S_1L['d']) pz_ys_d = fz_ys(t(pzl1_ys_d, (1, 0, 2)), pz2_z1s_d) pz2_z1s_c = fz2_z1s([], z2_z1s_c, chsi_c, rho_c, S_1L['c']) pz_ys_c = fz_ys([], pz2_z1s_c) pz2_z1s_t = fz2_z1s([], z2_z1s_c[bar_L['c']:], chsi_c[bar_L['c']:], \ rho_c[bar_L['c']:], S_1L['t']) # Junction layer computations # Compute p(zC |s) py_zs_d = fy_zs(pz_ys_d, py_s_d) py_zs_c = fy_zs(pz_ys_c, py_s_c) # Compute p(zt | yC, yD, sC, SD) pzt_yCyDs = fz_yCyDs(py_zs_c, pz_ys_d, py_s_c, M, S_1L, L) #===================================================================== # Compute MFA expectations #===================================================================== # Discrete head. Ez_ys_d, E_z1z2T_ys_d, E_z2z2T_ys_d, EeeT_ys_d = \ E_step_DGMM_d(zl1_ys_d, H_d, z_s_d, zc_s_d, z2_z1s_d, pz_ys_d,\ pz2_z1s_d, S_1L['d'], L['d']) # Continuous head Ez_ys_c, E_z1z2T_ys_c, E_z2z2T_ys_c, EeeT_ys_c = \ E_step_DGMM_c(H_c, z_s_c, zc_s_c, z2_z1s_c, pz_ys_c,\ pz2_z1s_c, S_1L['c'], L['c']) # Junction layers Ez_ys_t, E_z1z2T_ys_t, E_z2z2T_ys_t, EeeT_ys_t = \ E_step_DGMM_t(H_c[bar_L['c']:], \ z_s_c[bar_L['c']:], zc_s_c[bar_L['c']:], z2_z1s_c[bar_L['c']:],\ pzt_yCyDs, pz2_z1s_t, S_1L, L, k_1L) # Error here for the first two terms: p(y^h | z^t, s^C) != p(y^h | z^t, s^{1C:L}) pst_yCyD = fst_yCyD(py_zs_c, py_zs_d, pz_s_d, w_s_c, w_s_d, k_1L, L) ########################################################################### ############################ M step ####################################### ########################################################################### #======================================================= # Compute DGMM Parameters #======================================================= # Discrete head w_s_d = np.mean(ps_y_d, axis = 0) eta_d_barL, H_d_barL, psi_d_barL = M_step_DGMM(Ez_ys_d, E_z1z2T_ys_d, E_z2z2T_ys_d, \ EeeT_ys_d, ps_y_d, H_d, k_1L['d'][:-1],\ L_1L['d'], r_1L['d']) # Add dispatching function here eta_d[:bar_L['d']] = eta_d_barL H_d[:bar_L['d']] = H_d_barL psi_d[:bar_L['d']] = psi_d_barL # Continuous head w_s_c = np.mean(ps_y_c, axis = 0) eta_c_barL, H_c_barL, psi_c_barL = M_step_DGMM(Ez_ys_c, E_z1z2T_ys_c, E_z2z2T_ys_c, \ EeeT_ys_c, ps_y_c, H_c, k_1L['c'][:-1],\ L_1L['c'] + 1, r_1L['c']) eta_c[:bar_L['c']] = eta_c_barL H_c[:bar_L['c']] = H_c_barL psi_c[:bar_L['c']] = psi_c_barL # Common tail eta_t, H_t, psi_t, Ezst_y = M_step_DGMM_t(Ez_ys_t, E_z1z2T_ys_t, E_z2z2T_ys_t, \ EeeT_ys_t, ps_y_c, ps_y_d, pst_yCyD, \ H_c[bar_L['c']:], S_1L, k_1L, \ L_1L, L, r_1L['t']) eta_d[bar_L['d']:] = eta_t H_d[bar_L['d']:] = H_t psi_d[bar_L['d']:] = psi_t eta_c[bar_L['c']:] = eta_t H_c[bar_L['c']:] = H_t psi_c[bar_L['c']:] = psi_t #======================================================= # Identifiability conditions #======================================================= w_s_t = np.mean(pst_yCyD, axis = 0) eta_d, H_d, psi_d, AT_d, eta_c, H_c, psi_c, AT_c = network_identifiability(eta_d, \ H_d, psi_d, eta_c, H_c, psi_c, w_s_c, w_s_d, w_s_t, bar_L) #======================================================= # Compute GLLVM Parameters #======================================================= # We optimize each column separately as it is faster than all column jointly # (and more relevant with the independence hypothesis) lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y_d, \ pzl1_ys_d, z_s_d[0], AT_d[0], tol = tol, maxstep = maxstep) lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y_d, \ pzl1_ys_d, z_s_d[0], AT_d[0], tol = tol, maxstep = maxstep) lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y_d,\ pzl1_ys_d, z_s_d[0], AT_d[0], tol = tol, maxstep = maxstep) ########################################################################### ################## Clustering parameters updating ######################### ########################################################################### new_lik = np.sum(np.log(py_d) + np.log(py_c)) likelihood.append(new_lik) ratio = (new_lik - prev_lik)/abs(prev_lik) if n_clusters == 'multi': temp_classes = [] z_tail = [] classes = [[] for l in range(L['t'] - 1)] for l in clustering_layer: idx_to_sum = tuple(set(range(1, L['t'] + 1)) -\ set([clustering_layer[l] + 1])) psl_y = pst_yCyD.reshape(numobs, *k['t'],\ order = 'C').sum(idx_to_sum) temp_class_l = np.argmax(psl_y, axis = 1) sil_l = silhouette_score(dm, temp_class_l, metric = 'precomputed') temp_classes.append(temp_class_l) #z_tail.append(Ezst_y[l].sum(1)) new_sil[l] = sil_l #z_tail = [] for l in range(L['t'] - 1): zl = Ezst_y[l].sum(1) z_tail.append(zl) if best_sil[l] < new_sil[l]: # Update the quantity if the silhouette score is better best_sil[l] = deepcopy(new_sil[l]) classes[l] = deepcopy(temp_classes[l]) if zl.shape[-1] == 3: plot_3d(zl, classes[l]) elif zl.shape[-1] == 2: plot_2d(zl, classes[l]) else: idx_to_sum = tuple(set(range(1, L['t'] + 1)) - set([clustering_layer + 1])) psl_y = pst_yCyD.reshape(numobs, *k['t'], order = 'C').sum(idx_to_sum) temp_classes = np.argmax(psl_y, axis = 1) try: new_sil = silhouette_score(dm, temp_classes, metric = 'precomputed') except: new_sil = -1 z_tail = [Ezst_y[l].sum(1) for l in range(L['t'] - 1)] if best_sil < new_sil: # Update the quantity if the silhouette score is better zl = z_tail[clustering_layer] best_sil = deepcopy(new_sil) classes = deepcopy(temp_classes) if zl.shape[-1] == 3: plot_3d(zl, classes) elif zl.shape[-1] == 2: plot_2d(zl, classes) # Refresh the likelihood if best if best_lik < new_lik: best_lik = deepcopy(prev_lik) if prev_lik < new_lik: patience = 0 M = M_growth(it_num + 1, r_1L, numobs) else: patience += 1 ########################################################################### ######################## Parameter selection ############################# ########################################################################### min_nb_clusters = 2 is_not_min_specif = not(is_min_architecture_reached(k, r, min_nb_clusters)) if look_for_simpler_network(it_num) & perform_selec & is_not_min_specif: # To add: selection according to categ r_to_keep = r_select(y_bin, y_ord, y_categ, yc, zl1_ys_d,\ z2_z1s_d[:bar_L['d']], w_s_d, z2_z1s_c[:bar_L['c']], z2_z1s_c[bar_L['c']:], n_clusters) # Check layer deletion is_c_layer_deletion = np.any([len(rl) == 0 for rl in r_to_keep['c']]) is_d_layer_deletion = np.any([len(rl) == 0 for rl in r_to_keep['d']]) is_head_layer_deletion = np.any([is_c_layer_deletion, is_d_layer_deletion]) if is_head_layer_deletion: # Restart the algorithm if is_c_layer_deletion: r['c'] = [len(rl) for rl in r_to_keep['c'][:-1]] k['c'] = k['c'][:-1] if is_d_layer_deletion: r['d'] = [len(rl) for rl in r_to_keep['d'][:-1]] k['d'] = k['d'][:-1] init = dim_reduce_init(pd.DataFrame(y), n_clusters, k, r, nj, var_distrib,\ seed = None) eta_c, eta_d, H_c, H_d, psi_c, psi_d = dispatch_dgmm_init(init) lambda_bin, lambda_ord, lambda_categ = dispatch_gllvm_init(init) w_s_c, w_s_d = dispatch_paths_init(init) # *_1L standsds for quantities going through all the network (head + tail) k_1L, L_1L, L, bar_L, S_1L = nb_comps_and_layers(k) r_1L = {'c': r['c'] + r['t'], 'd': r['d'] + r['t'], 't': r['t']} M = M_growth(it_num + 1, r_1L, numobs) prev_lik = deepcopy(new_lik) it_num = it_num + 1 print(likelihood) print('Restarting the algorithm') continue new_Lt = np.sum([len(rl) != 0 for rl in r_to_keep['t']]) #- 1 # If r_l == 0, delete the last l + 1: layers new_Lt = np.sum([len(rl) != 0 for rl in r_to_keep['t']]) #- 1 #w_s_t = pst_yCyD.mean(0) k_to_keep = k_select(w_s_c, w_s_d, w_s_t, k, new_Lt, clustering_layer, n_clusters) is_selection = check_if_selection(r_to_keep, r, k_to_keep, k, L, new_Lt) assert new_Lt > 0 # > 1 ? if n_clusters == 'multi': assert new_Lt == L['t'] if is_selection: # Part to change when update also number of layers on each head nb_deleted_layers_tail = L['t'] - new_Lt L['t'] = new_Lt L_1L = {keys: values - nb_deleted_layers_tail for keys, values in L_1L.items()} eta_c, eta_d, H_c, H_d, psi_c, psi_d = dgmm_coeff_selection(eta_c,\ H_c, psi_c, eta_d, H_d, psi_d, L, r_to_keep, k_to_keep) lambda_bin, lambda_ord, lambda_categ = gllvm_coeff_selection(lambda_bin, lambda_ord,\ lambda_categ, r, r_to_keep) w_s_c, w_s_d = path_proba_selection(w_s_c, w_s_d, k, k_to_keep, new_Lt) k = {h: [len(k_to_keep[h][l]) for l in range(L[h])] for h in ['d', 't']} k['c'] = [len(k_to_keep['c'][l]) for l in range(L['c'] + 1)] r = {h: [len(r_to_keep[h][l]) for l in range(L[h])] for h in ['d', 't']} r['c'] = [len(r_to_keep['c'][l]) for l in range(L['c'] + 1)] k_1L, _, L, bar_L, S_1L = nb_comps_and_layers(k) r_1L = {'c': r['c'] + r['t'], 'd': r['d'] + r['t'], 't': r['t']} patience = 0 best_r = deepcopy(r) best_k = deepcopy(k) #======================================================= # Identifiability conditions #======================================================= eta_d, H_d, psi_d, AT_d, eta_c, H_c, psi_c, AT_c = network_identifiability(eta_d, \ H_d, psi_d, eta_c, H_c, psi_c, w_s_c, w_s_d, w_s_t, bar_L) print('New architecture:') print('k', k) print('r', r) print('L', L) print('S_1L', S_1L) print("w_s_c", len(w_s_c)) print("w_s_d", len(w_s_d)) M = M_growth(it_num + 1, r_1L, numobs) prev_lik = deepcopy(new_lik) print(likelihood) print('Silhouette score:', new_sil) it_num = it_num + 1 out = dict(likelihood = likelihood, classes = classes, \ best_r = best_r, best_k = best_k) if n_clusters == 'multi': out['z'] = z_tail else: out['z'] = z_tail[clustering_layer] return(out)
def closeleq(x, y): return np.logical_or(np.isclose(x, y), x <= y)
def closegeq(x, y): return np.logical_or(np.isclose(x, y), x >= y)
def dim_reduce_init(y, n_clusters, k, r, nj, var_distrib, seed=None): ''' Perform dimension reduction into a continuous r dimensional space and determine the init coefficients in that space y (numobs x p ndarray): The data k (dict of lists): The number of components of each layer of the network r (int): The dimensions of the components of each layer of the network nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable For categorical data: the number of different existing categories for each variable var_distrib (p 1darray): An array containing the types of the variables in y seed (None): The random state seed to use for the dimension reduction --------------------------------------------------------------------------------------- returns (dict): All initialisation parameters ''' if type(y) != pd.core.frame.DataFrame: raise TypeError('y should be a dataframe for prince') numobs = len(y) # Length of both heads and tail. L, bar_L and S might not be homogeneous # with the MDGMM notations bar_L = {'c': len(k['c']), 'd': len(k['d'])} L = {'c': len(k['c']), 'd': len(k['d']), 't': len(k['t']) - 1} # Paths of both heads and tail S = {'c': np.prod(k['c']), 'd': np.prod(k['d']), 't': np.prod(k['t'])} # Data of both heads yc = y.iloc[:, var_distrib == 'continuous'].values yd = y.iloc[:, var_distrib != 'continuous'].values #============================================================== # Dimension reduction performed with MCA on discrete data #============================================================== # Check input = False to remove mca = prince.MCA(n_components = r['d'][0], n_iter=3, copy=True,\ check_input=False, engine='auto', random_state = seed) z1D = mca.fit_transform(yd.astype(str)).values y = y.values # Be careful: The first z^c is the continuous data whether the first # z^d is the MCA transformed data. #============================================================== # Set the shape parameters of each discrete data type #============================================================== y_bin = y[:, np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')] y_bin = y_bin.astype(int) nj_bin = nj[np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')] nb_bin = len(nj_bin) y_categ = y[:, var_distrib == 'categorical'] nj_categ = nj[var_distrib == 'categorical'] nb_categ = len(nj_categ) y_ord = y[:, var_distrib == 'ordinal'] y_ord = y_ord.astype(int) nj_ord = nj[var_distrib == 'ordinal'] nb_ord = len(nj_ord) ss = StandardScaler() yc = ss.fit_transform(yc) #======================================================= # Determining the Gaussian Parameters #======================================================= init = {} # Initialise both heads quantities eta_d, H_d, psi_d, zd, paths_pred_d = init_head(z1D, k['d'], r['d'], numobs, L['d']) eta_c, H_c, psi_c, zc, paths_pred_c = init_head(yc, k['c'], r['c'], numobs, L['c']) # Initialisation of the common layer. The coefficients are those between the last # Layer of both heads and the first junction layer eta_h_last, H_h_last, psi_h_last, paths_pred_h_last, zt_first = init_junction_layer( r, k, zc, zd) eta_d.append(eta_h_last['d']) H_d.append(H_h_last['d']) psi_d.append(psi_h_last['d']) eta_c.append(eta_h_last['c']) H_c.append(H_h_last['c']) psi_c.append(psi_h_last['c']) paths_pred_d.append(paths_pred_h_last['d']) paths_pred_c.append(paths_pred_h_last['c']) zt = [zt_first] # Initialisation of the following common layers for l in range(L['t']): params = get_MFA_params(zt[l], k['t'][l], r['t'][l:]) eta_c.append(params['eta'][..., n_axis]) eta_d.append(params['eta'][..., n_axis]) H_c.append(params['H']) H_d.append(params['H']) psi_c.append(params['psi']) psi_d.append(params['psi']) zt.append(params['z_nextl']) zc.append(params['z_nextl']) zd.append(params['z_nextl']) paths_pred_c.append(params['classes']) paths_pred_d.append(params['classes']) paths_pred_c = np.stack(paths_pred_c).T paths_c, nb_paths_c = np.unique(paths_pred_c, return_counts=True, axis=0) paths_c, nb_paths_c = add_missing_paths(k['c'] + k['t'][:-1], paths_c, nb_paths_c) paths_pred_d = np.stack(paths_pred_d).T paths_d, nb_paths_d = np.unique(paths_pred_d, return_counts=True, axis=0) paths_d, nb_paths_d = add_missing_paths(k['d'] + k['t'][:-1], paths_d, nb_paths_d) w_s_c = nb_paths_c / numobs w_s_c = np.where(w_s_c == 0, 1E-16, w_s_c) w_s_d = nb_paths_d / numobs w_s_d = np.where(w_s_d == 0, 1E-16, w_s_d) k_dt = np.concatenate([k['d'] + k['t']]) w_s_t = w_s_d.reshape(*k_dt, order='C').sum(tuple(range(L['d']))) w_s_t = w_s_t.reshape(-1, order='C') # Check that all paths have been explored if (len(paths_c) != S['c'] * S['t']) | (len(paths_d) != S['d'] * S['t']): raise RuntimeError('Path initialisation failed') #============================================================= # Enforcing identifiability constraints over the first layer #============================================================= eta_d, H_d, psi_d, AT_d, eta_c, H_c, psi_c, AT_c = network_identifiability(eta_d, \ H_d, psi_d, eta_c, H_c, psi_c, w_s_c, w_s_d, w_s_t, bar_L) init['c'] = {} init['c']['eta'] = eta_c init['c']['H'] = H_c init['c']['psi'] = psi_c init['c']['w_s'] = w_s_c # Probabilities of each path through the network init['c']['z'] = zc init['d'] = {} init['d']['eta'] = eta_d init['d']['H'] = H_d init['d']['psi'] = psi_d init['d']['w_s'] = w_s_d # Probabilities of each path through the network init['d']['z'] = zd # The clustering layer is the one used to perform the clustering # i.e. the layer l such that k[l] == n_clusters if not (isnumeric(n_clusters)): if n_clusters == 'auto': #n_clusters = k['t'][0] # First tail layer is the default clustering layer in auto mode clustering_layer = L['c'] elif n_clusters == 'multi': clustering_layer = range(L['t']) else: raise ValueError( 'Please enter an int, auto or multi for n_clusters') else: kc_complete = k['c'] + k['t'][:-1] common_clus_layer_idx = (np.array(kc_complete) == n_clusters) common_clus_layer_idx[:L['c']] = False clustering_layer = np.argmax(common_clus_layer_idx) assert clustering_layer >= L['c'] init['classes'] = paths_pred_c[:, clustering_layer] #======================================================= # Determining the coefficients of the GLLVM layer #======================================================= # Determining lambda_bin coefficients. lambda_bin = np.zeros((nb_bin, r['d'][0] + 1)) for j in range(nb_bin): Nj = int(np.max( y_bin[:, j])) # The support of the jth binomial is [1, Nj] if Nj == 1: # If the variable is Bernoulli not binomial yj = y_bin[:, j] z_new = zd[0] else: # If not, need to convert Binomial output to Bernoulli output yj, z_new = bin_to_bern(Nj, y_bin[:, j], zd[0]) lr = LogisticRegression() if j < r['d'][0] - 1: lr.fit(z_new[:, :j + 1], yj) lambda_bin[j, :j + 2] = np.concatenate( [lr.intercept_, lr.coef_[0]]) else: lr.fit(z_new, yj) lambda_bin[j] = np.concatenate([lr.intercept_, lr.coef_[0]]) ## Identifiability of bin coefficients lambda_bin[:, 1:] = lambda_bin[:, 1:] @ AT_d[0][0] # Determining lambda_ord coefficients lambda_ord = [] for j in range(nb_ord): #Nj = len(np.unique(y_ord[:,j], axis = 0)) # The support of the jth ordinal is [1, Nj] yj = y_ord[:, j] ol = OrderedLogit() ol.fit(zd[0], yj) ## Identifiability of ordinal coefficients beta_j = (ol.beta_.reshape(1, r['d'][0]) @ AT_d[0][0]).flatten() lambda_ord_j = np.concatenate([ol.alpha_, beta_j]) lambda_ord.append(lambda_ord_j) # Determining lambda_categ coefficients lambda_categ = [] for j in range(nb_categ): yj = y_categ[:, j] lr = LogisticRegression(multi_class='multinomial') lr.fit(zd[0], yj) ## Identifiability of categ coefficients beta_j = lr.coef_ @ AT_d[0][0] lambda_categ.append(np.hstack([lr.intercept_[..., n_axis], beta_j])) init['lambda_bin'] = lambda_bin init['lambda_ord'] = lambda_ord init['lambda_categ'] = lambda_categ return init
'ordinal', 'categorical', 'categorical', 'categorical',\ 'categorical', 'bernoulli', 'ordinal', 'ordinal',\ 'continuous', 'categorical', 'bernoulli']) # Plotting utilities varnames = np.array(['age', 'workclass', 'fnlwgt',\ 'education.num', 'marital.status', 'occupation', 'relationship',\ 'race', 'sex', 'capital.gain', 'capital.loss',\ 'hours.per.week', 'native.country', 'income']) p = len(varnames) dtypes_dict = {'continuous': float, 'categorical': str, 'ordinal': float,\ 'bernoulli': str, 'binomial': int} cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli') #===================================== # Select the design #===================================== design = 'Absent' filenum = 1 sub_design = 'trivarié' prefix = design[:3] + '_' nb_files_per_design = 10 nb_pobs = 200 sub_aliases = {'bivarié': 'bivariate', 'trivarié': 'trivariate'} #===================================== # Import the train and test sets
lfp_tmp /= 100. # Scaling # remove evoked LFP lfp_tmp = lfp_tmp - np.mean(lfp_tmp, 2, keepdims=True) lfp[probe] = lfp_tmp # Desired CSD prediction locations z[probe] = np.stack([24. * np.ones(len(csd_loc[probe])), csd_loc[probe]]).T # %% Visualize data, check for outlier trials ol_bool = {} for probe in ['probeC', 'probeD']: trial_sd = np.std(lfp[probe], axis=2, keepdims=True) ol = np.any(np.abs(lfp[probe]) > 5 * trial_sd, axis=(0, 1)) ol_bool[probe] = ol ol = np.logical_or(ol_bool['probeC'], ol_bool['probeD']) print('outlier trials: %d' % np.sum(ol)) if plot_ol: for probe in ['probeC', 'probeD']: x1 = np.unique(x[probe][:, 0]) for j in x1: plt.figure(figsize=(6, 16)) for i, xi in enumerate(x[probe][x[probe][:, 0] == j]): plt.plot(t, xi[1] + 3 * lfp[probe][i, :, np.logical_not(ol)].T, 'k') plt.plot(t, xi[1] + 3 * lfp[probe][i, :, ol].T, 'r') plt.title('%s x1 = %0.2f microns' % (probe, j)) for probe in ['probeC', 'probeD']: lfp[probe] = lfp[probe][:, :, np.logical_not(ol)]
def dim_reduce_init(y, n_clusters, k, r, nj, var_distrib, use_famd=False, seed=None): ''' Perform dimension reduction into a continuous r dimensional space and determine the init coefficients in that space y (numobs x p ndarray): The observations containing categorical variables n_clusters (int): The number of clusters to look for in the data k (1d array): The number of components of the latent Gaussian mixture layers r (int): The dimension of latent variables nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable var_distrib (p 1darray): An array containing the types of the variables in y use_famd (Bool): Whether to the famd method (True) or not (False), to initiate the first continuous latent variable. Otherwise MCA is used. seed (None): The random state seed to use for the dimension reduction --------------------------------------------------------------------------------------- returns (dict): All initialisation parameters ''' L = len(k) numobs = len(y) S = np.prod(k) #============================================================== # Dimension reduction performed with MCA #============================================================== if type(y) != pd.core.frame.DataFrame: raise TypeError('y should be a dataframe for prince') if (np.array(var_distrib) == 'ordinal').all(): print('PCA init') pca = prince.PCA(n_components = r[0], n_iter=3, rescale_with_mean=True,\ rescale_with_std=True, copy=True, check_input=True, engine='auto',\ random_state = seed) z1 = pca.fit_transform(y).values elif use_famd: famd = prince.FAMD(n_components = r[0], n_iter=3, copy=True, check_input=False, \ engine='auto', random_state = seed) z1 = famd.fit_transform(y).values else: # Check input = False to remove mca = prince.MCA(n_components = r[0], n_iter=3, copy=True,\ check_input=False, engine='auto', random_state = seed) z1 = mca.fit_transform(y).values z = [z1] y = y.values #============================================================== # Set the shape parameters of each data type #============================================================== y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',\ var_distrib == 'binomial')].astype(int) nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',\ var_distrib == 'binomial')] nb_bin = len(nj_bin) y_ord = y[:, var_distrib == 'ordinal'].astype(float).astype(int) nj_ord = nj[var_distrib == 'ordinal'] nb_ord = len(nj_ord) y_categ = y[:, var_distrib == 'categorical'] nj_categ = nj[var_distrib == 'categorical'] nb_categ = len(nj_categ) # Set y_count standard error to 1 y_cont = y[:, var_distrib == 'continuous'] # Before was np.float y_cont = y_cont / np.std(y_cont.astype(float), axis=0, keepdims=True) nb_cont = y_cont.shape[1] #======================================================= # Determining the Gaussian Parameters #======================================================= init = {} eta = [] H = [] psi = [] paths_pred = np.zeros((numobs, L)) for l in range(L): params = get_MFA_params(z[l], k[l], r[l:]) eta.append(params['eta'][..., n_axis]) H.append(params['H']) psi.append(params['psi']) z.append(params['z_nextl']) paths_pred[:, l] = params['classes'] paths, nb_paths = np.unique(paths_pred, return_counts=True, axis=0) paths, nb_paths = add_missing_paths(k, paths, nb_paths) w_s = nb_paths / numobs w_s = np.where(w_s == 0, 1E-16, w_s) # Check all paths have been explored if len(paths) != S: raise RuntimeError('Real path len is', S, 'while the initial number', \ 'of path was only', len(paths)) w_s = w_s.reshape(*k).flatten('C') #============================================================= # Enforcing identifiability constraints over the first layer #============================================================= H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) init['eta'] = eta init['H'] = H init['psi'] = psi init['w_s'] = w_s # Probabilities of each path through the network init['z'] = z # The clustering layer is the one used to perform the clustering # i.e. the layer l such that k[l] == n_clusters clustering_layer = np.argmax(np.array(k) == n_clusters) init[ 'classes'] = paths_pred[:, clustering_layer] # 0 To change with clustering_layer_idx #======================================================= # Determining the coefficients of the GLLVM layer #======================================================= # Determining lambda_bin coefficients. lambda_bin = np.zeros((nb_bin, r[0] + 1)) for j in range(nb_bin): Nj = np.max(y_bin[:, j]) # The support of the jth binomial is [1, Nj] if Nj == 1: # If the variable is Bernoulli not binomial yj = y_bin[:, j] z_new = z[0] else: # If not, need to convert Binomial output to Bernoulli output yj, z_new = bin_to_bern(Nj, y_bin[:, j], z[0]) lr = LogisticRegression() if j < r[0] - 1: lr.fit(z_new[:, :j + 1], yj) lambda_bin[j, :j + 2] = np.concatenate( [lr.intercept_, lr.coef_[0]]) else: lr.fit(z_new, yj) lambda_bin[j] = np.concatenate([lr.intercept_, lr.coef_[0]]) ## Identifiability of bin coefficients lambda_bin[:, 1:] = lambda_bin[:, 1:] @ AT[0][0] # Determining lambda_ord coefficients lambda_ord = [] for j in range(nb_ord): Nj = len(np.unique( y_ord[:, j], axis=0)) # The support of the jth ordinal is [1, Nj] yj = y_ord[:, j] ol = OrderedLogit() ol.fit(z[0], yj) ## Identifiability of ordinal coefficients beta_j = (ol.beta_.reshape(1, r[0]) @ AT[0][0]).flatten() lambda_ord_j = np.concatenate([ol.alpha_, beta_j]) lambda_ord.append(lambda_ord_j) # Determining the coefficients of the continuous variables lambda_cont = np.zeros((nb_cont, r[0] + 1)) for j in range(nb_cont): yj = y_cont[:, j] linr = LinearRegression() if j < r[0] - 1: linr.fit(z[0][:, :j + 1], yj) lambda_cont[j, :j + 2] = np.concatenate([[linr.intercept_], linr.coef_]) else: linr.fit(z[0], yj) lambda_cont[j] = np.concatenate([[linr.intercept_], linr.coef_]) ## Identifiability of continuous coefficients lambda_cont[:, 1:] = lambda_cont[:, 1:] @ AT[0][0] # Determining lambda_categ coefficients lambda_categ = [] for j in range(nb_categ): yj = y_categ[:, j] lr = LogisticRegression(multi_class='multinomial') lr.fit(z[0], yj) ## Identifiability of categ coefficients beta_j = lr.coef_ @ AT[0][0] lambda_categ.append(np.hstack([lr.intercept_[..., n_axis], beta_j])) init['lambda_bin'] = lambda_bin init['lambda_ord'] = lambda_ord init['lambda_cont'] = lambda_cont init['lambda_categ'] = lambda_categ return init
def M1DGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\ dm = [], max_patience = 1, use_silhouette = True):# dm small hack to remove ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM) y (numobs x p ndarray): The observations containing mixed variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not use_silhouette (Bool): If True use the silhouette as quality criterion (best for clustering) else use the likelihood (best for data augmentation). ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' prev_lik = - 1E16 best_lik = -1E16 best_sil = -1 new_sil = -1 tol = 0.01 patience = 0 is_looking_for_better_arch = False # Initialize the parameters eta = deepcopy(init['eta']) psi = deepcopy(init['psi']) lambda_bin = deepcopy(init['lambda_bin']) lambda_ord = deepcopy(init['lambda_ord']) lambda_cont = deepcopy(init['lambda_cont']) lambda_categ = deepcopy(init['lambda_categ']) H = deepcopy(init['H']) w_s = deepcopy(init['w_s']) # Probability of path s' through the network for all s' in Omega numobs = len(y) likelihood = [] silhouette = [] it_num = 0 ratio = 1000 np.random.seed = seed out = {} # Store the full output # Dispatch variables between categories y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')] nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')].astype(int) nb_bin = len(nj_bin) y_ord = y[:, var_distrib == 'ordinal'] nj_ord = nj[var_distrib == 'ordinal'].astype(int) nb_ord = len(nj_ord) y_categ = y[:, var_distrib == 'categorical'] nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_categ = len(nj_categ) y_cont = y[:, var_distrib == 'continuous'].astype(float) nb_cont = y_cont.shape[1] # Set y_count standard error to 1 y_cont = y_cont / y_cont.std(axis = 0, keepdims = True) L = len(k) k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)]) M = M_growth(1, r, numobs) assert nb_bin + nb_ord + nb_cont + nb_categ > 0 if nb_bin + nb_ord + nb_cont + nb_categ != len(var_distrib): raise ValueError('Some variable types were not understood,\ existing types are: continuous, categorical,\ ordinal, binomial and bernoulli') # Compute the Gower matrix if len(dm) == 0: cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli') dm = gower_matrix(y, cat_features = cat_features) # Do not stop the iterations if there are some iterations left or if the likelihood is increasing # or if we have not reached the maximum patience and if a new architecture was looked for # in the previous iteration while ((it_num < it) & (ratio > eps) & (patience <= max_patience)) | is_looking_for_better_arch: print(it_num) # The clustering layer is the one used to perform the clustering # i.e. the layer l such that k[l] == n_clusters if not(isnumeric(n_clusters)): if n_clusters == 'auto': clustering_layer = 0 else: raise ValueError('Please enter an int or "auto" for n_clusters') else: assert (np.array(k) == n_clusters).any() clustering_layer = np.argmax(np.array(k) == n_clusters) ##################################################################################### ################################# S step ############################################ ##################################################################################### #===================================================================== # Draw from f(z^{l} | s, Theta) for all s in Omega #===================================================================== mu_s, sigma_s = compute_path_params(eta, H, psi) sigma_s = ensure_psd(sigma_s) z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M) #======================================================================== # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1 #======================================================================== chsi = compute_chsi(H, psi, mu_s, sigma_s) chsi = ensure_psd(chsi) rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi) # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively z2_z1s = draw_z2_z1s(chsi, rho, M, r) #======================================================================= # Compute the p(y| z1) for all variable categories #======================================================================= py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord, \ lambda_categ, y_categ, nj_categ, y_cont, lambda_cont, z_s[0]) #======================================================================== # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s #======================================================================== zl1_ys = draw_zl1_ys(z_s, py_zl1, M) ##################################################################################### ################################# E step ############################################ ##################################################################################### #===================================================================== # Compute conditional probabilities used in the appendix of asta paper #===================================================================== pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s, py_zl1) #===================================================================== # Compute p(z^{(l)}| s, y). Equation (5) of the paper #===================================================================== pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S) pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s) #===================================================================== # Compute MFA expectations #===================================================================== Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \ E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S) ########################################################################### ############################ M step ####################################### ########################################################################### #======================================================= # Compute MFA Parameters #======================================================= w_s = np.mean(ps_y, axis = 0) eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H, k) #======================================================= # Identifiability conditions #======================================================= # Update eta, H and Psi values H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) del(Ez) #======================================================= # Compute GLLVM Parameters #======================================================= lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_cont = cont_params_GLLVM(y_cont, lambda_cont, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) ########################################################################### ################## Clustering parameters updating ######################### ########################################################################### new_lik = np.sum(np.log(p_y)) likelihood.append(new_lik) silhouette.append(new_sil) ratio = abs((new_lik - prev_lik)/prev_lik) idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1])) psl_y = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum) temp_class = np.argmax(psl_y, axis = 1) try: new_sil = silhouette_score(dm, temp_class, metric = 'precomputed') except ValueError: new_sil = -1 # Store the params according to the silhouette or likelihood is_better = (best_sil < new_sil) if use_silhouette else (best_lik < new_lik) if is_better: z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1) best_sil = deepcopy(new_sil) classes = deepcopy(temp_class) ''' plt.figure(figsize=(8,8)) plt.scatter(z[:, 0], z[:, 1], c = classes) plt.show() ''' # Store the output out['classes'] = deepcopy(classes) out['best_z'] = deepcopy(z_s[0]) out['Ez.y'] = z out['best_k'] = deepcopy(k) out['best_r'] = deepcopy(r) out['best_w_s'] = deepcopy(w_s) out['lambda_bin'] = deepcopy(lambda_bin) out['lambda_ord'] = deepcopy(lambda_ord) out['lambda_categ'] = deepcopy(lambda_categ) out['lambda_cont'] = deepcopy(lambda_cont) out['eta'] = deepcopy(eta) out['mu'] = deepcopy(mu_s) out['sigma'] = deepcopy(sigma_s) out['psl_y'] = deepcopy(psl_y) out['ps_y'] = deepcopy(ps_y) # Refresh the classes only if they provide a better explanation of the data if best_lik < new_lik: best_lik = deepcopy(prev_lik) if prev_lik < new_lik: patience = 0 M = M_growth(it_num + 2, r, numobs) else: patience += 1 ########################################################################### ######################## Parameter selection ############################# ########################################################################### min_nb_clusters = 2 if isnumeric(n_clusters): # To change when add multi mode is_not_min_specif = not(np.all(np.array(k) == n_clusters) & np.array_equal(r, [2,1])) else: is_not_min_specif = not(np.all(np.array(k) == min_nb_clusters) & np.array_equal(r, [2,1])) is_looking_for_better_arch = look_for_simpler_network(it_num) & perform_selec & is_not_min_specif if is_looking_for_better_arch: r_to_keep = r_select(y_bin, y_ord, y_categ, y_cont, zl1_ys, z2_z1s, w_s) # If r_l == 0, delete the last l + 1: layers new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1 k_to_keep = k_select(w_s, k, new_L, clustering_layer, not(isnumeric(n_clusters))) is_L_unchanged = (L == new_L) is_r_unchanged = np.all([len(r_to_keep[l]) == r[l] for l in range(new_L + 1)]) is_k_unchanged = np.all([len(k_to_keep[l]) == k[l] for l in range(new_L)]) is_selection = not(is_r_unchanged & is_k_unchanged & is_L_unchanged) assert new_L > 0 if is_selection: eta = [eta[l][k_to_keep[l]] for l in range(new_L)] eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][k_to_keep[l]] for l in range(new_L)] H = [H[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)] psi = [psi[l][k_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)] if nb_bin > 0: # Add the intercept: bin_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) lambda_bin = lambda_bin[:, bin_r_to_keep] if nb_ord > 0: # Intercept coefficients handling is a little more complicated here lambda_ord_intercept = [lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord] Lambda_ord_var = np.stack([lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord]) Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]] lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\ for j in range(nb_ord)] # To recheck if nb_cont > 0: # Add the intercept: cont_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) lambda_cont = lambda_cont[:, cont_r_to_keep] if nb_categ > 0: lambda_categ_intercept = [lambda_categ[j][:, 0] for j in range(nb_categ)] Lambda_categ_var = [lambda_categ_j[:,-r[0]:] for lambda_categ_j in lambda_categ] Lambda_categ_var = [lambda_categ_j[:, r_to_keep[0]] for lambda_categ_j in lambda_categ] lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\ for j in range(nb_categ)] w = w_s.reshape(*k, order = 'C') new_k_idx_grid = np.ix_(*k_to_keep[:new_L]) # If layer deletion, sum the last components of the paths if L > new_L: deleted_dims = tuple(range(L)[new_L:]) w_s = w[new_k_idx_grid].sum(deleted_dims).flatten(order = 'C') else: w_s = w[new_k_idx_grid].flatten(order = 'C') w_s /= w_s.sum() # Refresh the classes: TO RECHECK #idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1])) #ps_y_tmp = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum) #np.argmax(ps_y_tmp[:, k_to_keep[0]], axis = 1) k = [len(k_to_keep[l]) for l in range(new_L)] r = [len(r_to_keep[l]) for l in range(new_L + 1)] k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)]) L = new_L patience = 0 # Identifiability conditions H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) del(Ez) print('New architecture:') print('k', k) print('r', r) print('L', L) print('S',S) print("w_s", len(w_s)) prev_lik = deepcopy(new_lik) it_num = it_num + 1 print(likelihood) print(silhouette) out['likelihood'] = likelihood out['silhouette'] = silhouette return(out)
le = LabelEncoder() for col_idx, colname in enumerate(y.columns): if var_distrib[col_idx] == 'bernoulli': y[colname] = le.fit_transform(y[colname]) enc = OneHotEncoder(sparse=False, drop='first') labels_oh = enc.fit_transform(np.array(labels).reshape(-1, 1)).flatten() nj, nj_bin, nj_ord, nj_categ = compute_nj(y, var_distrib) y_np = y.values nb_cont = np.sum(var_distrib == 'continuous') p_new = y.shape[1] # Feature category (cf) cf_non_enc = np.logical_or(vd_categ_non_enc == 'categorical', vd_categ_non_enc == 'bernoulli') # Non encoded version of the dataset: y_nenc_typed = y_categ_non_enc.astype(np.object) y_np_nenc = y_nenc_typed.values # Defining distances over the non encoded features dm = gower_matrix(y_nenc_typed, cat_features=cf_non_enc) dtype = {y.columns[j]: np.float64 if (var_distrib[j] != 'bernoulli') and \ (var_distrib[j] != 'categorical') else np.str for j in range(p_new)} y = y.astype(dtype, copy=True) #===========================================# # Running the algorithm
def stat_all(z, target, var_distrib, weights, lambda_bin, nj_bin, lambda_categ, nj_categ,\ lambda_ord, nj_ord, lambda_cont, y_std): # Prevent the shape changes caused by the scipy minimize function if len(z.shape) == 1: z = z[n_axis] #================================= # Binary and count variables #================================= is_count = np.logical_or(var_distrib == 'binomial', var_distrib == 'bernoulli') count_weights = weights[is_count] count = stat_bin(lambda_bin, z, nj_bin) norm = np.where(target[is_count] > 0, target[is_count], 1) count_dist = ((count - target[is_count]) / norm)**2 count_dist = np.sum(count_dist * count_weights) #================================= # Continuous variables #================================= cont_weights = weights[var_distrib == 'continuous'] cont = stat_cont(lambda_cont, z) mean_cont = cont * y_std norm = np.where(target[var_distrib == 'continuous'] > 0,\ target[var_distrib == 'continuous'], 1) cont_dist = ((mean_cont - target[var_distrib == 'continuous'])\ / norm) ** 2 cont_dist = np.sum(cont_dist * cont_weights) #================================= # Categorical variables #================================= categ_weights = weights[var_distrib == 'categorical'] nb_categ = len(nj_categ) categ = stat_categ(lambda_categ, z, nj_categ) categ_dist = [] for j in range(nb_categ): true_idx = int(target[var_distrib == 'categorical'][j]) categ_dist.append((1 - categ[j][true_idx])**2) categ_dist = np.sum(categ_dist * categ_weights) #================================= # Ordinal variables #================================= ord_weights = weights[var_distrib == 'ordinal'] nb_ord = len(nj_ord) ord_ = stat_ord(lambda_ord, z, nj_ord) ord_dist = [] for j in range(nb_ord): true_idx = int(target[var_distrib == 'ordinal'][j]) ord_dist.append((1 - (ord_[j][true_idx + 1] - ord_[j][true_idx])**2)) ord_dist = np.sum(ord_dist * ord_weights) return count_dist + categ_dist + ord_dist + cont_dist
def cross_validation(odom_1, aligned_1, odom_2, aligned_2, type_1, type_2, K=10): """Function to run cross-validation to run nonlinear optimization for optimal pose estimation and evaluation. Performs cross-validation K times and splits the dataset into K (approximately) even splits, to be used for in-sample training and out-of-sample evaluation. This function estimates a relative transformation between two lidar frames using nonlinear optimization, and evaluates the robustness of this estimate through K-fold cross-validation performance of our framework. Though this function does not return any values, it saves all results in the 'results' relative path. Parameters: odom_1 (pd.DataFrame): DataFrame corresponding to odometry data for the pose we wish to transform into the odom_2 frame of reference. See data/main_odometry.csv for an example of the headers/columns/data types this function expects this DataFrame to have. aligned_1 (pd.DataFrame): DataFrame corresponding to aligned odometry data given the 3 sets of odometry data for the 3 lidar sensors. This data corresponds to the odom_1 sensor frame. odom_2 (pd.DataFrame): DataFrame corresponding to odometry data for the pose we wish to transform the odom_1 frame of reference into. See data/main_odometry.csv for an example of the headers/columns/data types this function expects this DataFrame to have. aligned_2 (pd.DataFrame): DataFrame corresponding to aligned odometry data given the 3 sets of odometry data for the 3 lidar sensors. This data corresponds to the odom_2 sensor frame. type_1 (str): String denoting the lidar type. Should be in the set {'main', 'front', 'rear'}. This type corresponds to the data type for the odom_1 frame. type_2 (str): String denoting the lidar type. Should be in the set {'main', 'front', 'rear'}. This type corresponds to the data type for the odom_2 frame. K (int): The number of folds to be used for cross-validation. Defaults to 10. """ # Get ICP covariance matrices # Odom 1 lidar odometry odom1_icp, odom1_trans_cov, odom1_trans_cov_max, \ odom1_trans_cov_avg, odom1_rot_cov, odom1_rot_cov_max, \ odom1_rot_cov_avg, odom1_reject = parse_icp_cov(odom_1, type=type_1, reject_thr=REJECT_THR) # Odom 2 lidar odometry odom2_icp, odom2_trans_cov, odom2_trans_cov_max, \ odom2_trans_cov_avg, odom2_rot_cov, odom2_rot_cov_max, \ odom2_rot_cov_avg, odom2_reject = parse_icp_cov(odom_2, type=type_2, reject_thr=REJECT_THR) # Calculate relative poses (odom1_aligned, odom1_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_1) (odom2_aligned, odom2_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_2) # Compute weights for weighted estimate cov_t_odom1, cov_R_odom1 = compute_weights_euler(odom1_aligned) cov_t_odom2, cov_R_odom2 = compute_weights_euler(odom2_aligned) # Extract a single scalar using the average value from rotation and translation var_t_odom1 = extract_variance(cov_t_odom1, mode="max") var_R_odom1 = extract_variance(cov_R_odom1, mode="max") var_t_odom2 = extract_variance(cov_t_odom2, mode="max") var_R_odom2 = extract_variance(cov_R_odom2, mode="max") # Optimization (1) Instantiate a manifold translation_manifold = Euclidean(3) # Translation vector so3 = Rotations(3) # Rotation matrix manifold = Product((so3, translation_manifold)) # Instantiate manifold # Get initial guesses for our estimations if os.path.exists(PKL_POSES_PATH): # Check to make sure path exists transforms_dict = load_transforms( PKL_POSES_PATH) # Relative transforms # Map types to sensor names to access initial estimate relative transforms types2sensors = {"main": "velodyne", "front": "front", "rear": "rear"} # Now get initial guesses from the relative poses initial_guess_odom1_odom2 = transforms_dict["{}_{}".format( types2sensors[type_1], types2sensors[type_2])] # Print out all the initial estimates as poses print("INITIAL GUESS {} {}: \n {} \n".format(types2sensors[type_1], types2sensors[type_2], initial_guess_odom1_odom2)) # Get rotation matrices for initial guesses R0_odom1_odom2, t0_odom1_odom2 = initial_guess_odom1_odom2[:3, :3], \ initial_guess_odom1_odom2[:3, 3] X0_odom1_odom2 = (R0_odom1_odom2, t0_odom1_odom2) # Pymanopt estimate print("INITIAL GUESS {} {}: \n R0: \n {} \n\n t0: \n {} \n".format( types2sensors[type_1], types2sensors[type_2], R0_odom1_odom2, t0_odom1_odom2)) # Create KFold xval object to get training/validation indices kf = KFold(n_splits=K, random_state=None, shuffle=False) k = 0 # Set fold counter to 0 # Dataset A = np.array(odom2_rel_poses) # First set of poses B = np.array(odom1_rel_poses) # Second set of poses N = len(A) assert len(A) == len(B) # Sanity check to ensure odometry data matches r = np.logical_or(np.array(odom1_reject)[:N], np.array(odom2_reject)[:N]) # Outlier rejection print("NUMBER OF CROSS-VALIDATION FOLDS: {}".format(K)) # Iterate over 30 second intervals of the poses for train_index, test_index in kf.split( A): # Perform K-fold cross-validation # Path for results from manifold optimization analysis_results_path = os.path.join(ANALYSIS_RESULTS_PATH, "k={}".format(k)) final_estimates_path = os.path.join(FINAL_ESTIMATES_PATH, "k={}".format(k)) odometry_plots_path = os.path.join(ODOMETRY_PLOTS_PATH, "k={}".format(k)) # Make sure all paths exist - if they don't create them for path in [ analysis_results_path, final_estimates_path, odometry_plots_path ]: check_dir(path) # Get training data A_train = A[train_index] B_train = B[train_index] N_train = min(A_train.shape[0], B_train.shape[0]) r_train = r[train_index] print("FOLD NUMBER: {}, NUMBER OF TRAINING SAMPLES: {}".format( k, N_train)) omega = np.max([var_R_odom1, var_R_odom2 ]) # Take average across different odometries rho = np.max([var_t_odom1, var_t_odom2]) # Take average across different odometries cost_lambda = lambda x: cost(x, A_train, B_train, r_train, rho, omega, WEIGHTED) # Create cost function problem = Problem(manifold=manifold, cost=cost_lambda) # Create problem solver = CustomSteepestDescent() # Create custom solver X_opt = solver.solve(problem, x=X0_odom1_odom2) # Solve problem print("Initial Guess for Main-Front Transformation: \n {}".format( initial_guess_odom1_odom2)) print("Optimal solution between {} and {} " "reference frames: \n {}".format(types2sensors[type_1], types2sensors[type_2], X_opt)) # Take intermediate values for plotting estimates_x = solver.estimates errors = solver.errors iters = solver.iterations # Metrics dictionary estimates_dict = {i: T for i, T in zip(iters, estimates_x)} error_dict = {i: e for i, e in zip(iters, errors)} # Save intermediate results to a pkl file estimates_fname = os.path.join( analysis_results_path, "estimates_{}_{}.pkl".format(types2sensors[type_1], types2sensors[type_2], X_opt)) error_fname = os.path.join( analysis_results_path, "error_{}_{}.pkl".format(types2sensors[type_1], types2sensors[type_2], X_opt)) # Save estimates to pickle file with open(estimates_fname, "wb") as pkl_estimates: pickle.dump(estimates_dict, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname, "wb") as pkl_error: pickle.dump(error_dict, pkl_error) pkl_error.close() # Calculate difference between initial guess and final X_opt_T = construct_pose(X_opt[0], X_opt[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(X_opt_T, initial_guess_odom1_odom2))) # Compute the weighted RMSE (training/in-sample) train_rmse_init_weighted, train_rmse_final_weighted, train_rmse_init_R_weighted, \ train_rmse_init_t_weighted, train_rmse_final_R_weighted, \ train_rmse_final_t_weighted = compute_rmse_weighted( initial_guess_odom1_odom2, X_opt_T, A_train, B_train, rho, omega) # Compute the unweighted RMSE (training/in-sample) train_rmse_init_unweighted, train_rmse_final_unweighted, train_rmse_init_R_unweighted, \ train_rmse_init_t_unweighted, train_rmse_final_R_unweighted, \ train_rmse_final_t_unweighted = compute_rmse_unweighted( initial_guess_odom1_odom2, X_opt_T, A_train, B_train) # Concatenate all RMSE values for training/in-sample train_rmses = [ train_rmse_init_unweighted, train_rmse_final_unweighted, train_rmse_init_weighted, train_rmse_final_weighted, train_rmse_init_R_unweighted, train_rmse_init_t_unweighted, train_rmse_final_R_unweighted, train_rmse_final_t_unweighted, train_rmse_init_R_weighted, train_rmse_init_t_weighted, train_rmse_final_R_weighted, train_rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join( analysis_results_path, "train_rmse_{}_{}.txt".format(types2sensors[type_1], types2sensors[type_2])) display_and_save_rmse(train_rmses, outpath) # Get test data A_test = A[test_index] B_test = B[test_index] N_test = min(A_test.shape[0], B_test.shape[0]) print("NUMBER OF TEST SAMPLES: {}".format(N_test)) # Compute the weighted RMSE (testing/out-of-sample) test_rmse_init_weighted, test_rmse_final_weighted, test_rmse_init_R_weighted, \ test_rmse_init_t_weighted, test_rmse_final_R_weighted, \ test_rmse_final_t_weighted = compute_rmse_weighted(initial_guess_odom1_odom2, X_opt_T, A_test, B_test, rho, omega) # Compute the unweighted RMSE (testing/out-of-sample) test_rmse_init_unweighted, test_rmse_final_unweighted, test_rmse_init_R_unweighted, \ test_rmse_init_t_unweighted, test_rmse_final_R_unweighted, \ test_rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_odom1_odom2, X_opt_T, A_test, B_test) # Concatenate all RMSE values for testing/out-of-sample test_rmses = [ test_rmse_init_unweighted, test_rmse_final_unweighted, test_rmse_init_weighted, test_rmse_final_weighted, test_rmse_init_R_unweighted, test_rmse_init_t_unweighted, test_rmse_final_R_unweighted, test_rmse_final_t_unweighted, test_rmse_init_R_weighted, test_rmse_init_t_weighted, test_rmse_final_R_weighted, test_rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join( analysis_results_path, "test_rmse_{}_{}.txt".format(types2sensors[type_1], types2sensors[type_2])) display_and_save_rmse(test_rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join( final_estimates_path, "{}_{}.txt".format(types2sensors[type_1], types2sensors[type_2])) np.savetxt(final_estimate_outpath, X_opt_T) # Finally, increment k k += 1
def main(): """Main function to run nonlinear manifold optimization on SE(3) to estimate an optimal relative pose transformation between coordinate frames given by the different lidar sensors.""" # Extract and process the CSVs main_odometry = relative_pose_processing.process_df(MAIN_ODOM_CSV) front_odometry = relative_pose_processing.process_df(FRONT_ODOM_CSV) rear_odometry = relative_pose_processing.process_df(REAR_ODOM_CSV) # Process poses (main_aligned, front_aligned, rear_aligned) = relative_pose_processing.align_df( [main_odometry, front_odometry, rear_odometry]) # Get ICP covariance matrices # Main lidar odometry main_icp, main_trans_cov, main_trans_cov_max, \ main_trans_cov_avg, main_rot_cov, main_rot_cov_max, \ main_rot_cov_avg, main_reject = parse_icp_cov(main_odometry, type="main", reject_thr=REJECT_THR) # Front lidar odometry front_icp, front_trans_cov, front_trans_cov_max, \ front_trans_cov_avg, front_rot_cov, front_rot_cov_max, \ front_rot_cov_avg, front_reject = parse_icp_cov(front_odometry, type="front", reject_thr=REJECT_THR) # Rear lidar odometry rear_icp, rear_trans_cov, rear_trans_cov_max, \ rear_trans_cov_avg, rear_rot_cov, rear_rot_cov_max, \ rear_rot_cov_avg, rear_reject = parse_icp_cov(rear_odometry, type="rear", reject_thr=REJECT_THR) # Calculate relative poses (main_aligned, main_rel_poses) = relative_pose_processing.calc_rel_poses(main_aligned) (front_aligned, front_rel_poses) = relative_pose_processing.calc_rel_poses(front_aligned) (rear_aligned, rear_rel_poses) = relative_pose_processing.calc_rel_poses(rear_aligned) cov_t_main, cov_R_main = compute_weights_euler(main_aligned) cov_t_front, cov_R_front = compute_weights_euler(front_aligned) cov_t_rear, cov_R_rear = compute_weights_euler(rear_aligned) # Extract a single scalar using the average value from rotation and translation var_t_main = extract_variance(cov_t_main, mode="max") var_R_main = extract_variance(cov_R_main, mode="max") var_t_front = extract_variance(cov_t_front, mode="max") var_R_front = extract_variance(cov_R_front, mode="max") var_t_rear = extract_variance(cov_t_main, mode="max") var_R_rear = extract_variance(cov_R_rear, mode="max") # Optimization (1) Instantiate a manifold translation_manifold = Euclidean(3) # Translation vector so3 = Rotations(3) # Rotation matrix manifold = Product((so3, translation_manifold)) # Instantiate manifold # Get initial guesses for our estimations initial_poses = {} if os.path.exists(PKL_POSES_PATH): # Check to make sure path exists transforms_dict = load_transforms( PKL_POSES_PATH) # Loads relative transforms # Now get initial guesses from the relative poses initial_guess_main_front = transforms_dict[ "velodyne_front"] # Get relative transform from main to front (T^{V}_{F}) initial_guess_main_rear = transforms_dict[ "velodyne_rear"] # Get relative transform from front to main T^{V}_{B}) initial_guess_front_rear = np.linalg.inv( initial_guess_main_front ) @ initial_guess_main_rear # Get relative transform from front to rear T^{B}_{W}) direct_initial_guess_front_rear = transforms_dict[ "direct_front_rear"] # Transform directly computed # Print out all the initial estimates as poses print( "INITIAL GUESS MAIN FRONT: \n {} \n".format(initial_guess_main_front)) print("INITIAL GUESS MAIN REAR: \n {} \n".format(initial_guess_main_rear)) print( "INITIAL GUESS FRONT REAR: \n {} \n".format(initial_guess_front_rear)) print("INITIAL GUESS DIRECT FRONT REAR: \n {} \n".format( direct_initial_guess_front_rear)) # Get rotation matrices for initial guesses R0_main_front, t0_main_front = initial_guess_main_front[:3, : 3], initial_guess_main_front[: 3, 3] X0_main_front = (R0_main_front, t0_main_front) print("INITIAL GUESS MAIN FRONT: \n R0: \n {} \n\n t0: \n {} \n".format( R0_main_front, t0_main_front)) R0_main_rear, t0_main_rear = initial_guess_main_rear[:3, : 3], initial_guess_main_rear[: 3, 3] X0_main_rear = (R0_main_rear, t0_main_rear) print("INITIAL GUESS MAIN REAR: \n R0: \n {} \n\n t0: \n {} \n".format( R0_main_rear, t0_main_rear)) R0_front_rear, t0_front_rear = initial_guess_front_rear[:3, : 3], initial_guess_front_rear[: 3, 3] X0_front_rear = (R0_front_rear, t0_front_rear) print("INITIAL GUESS FRONT REAR: \n R0: \n {} \n\n t0: \n {} \n".format( R0_front_rear, t0_front_rear)) ######################## MAIN FRONT CALIBRATION ################################ # Carry out optimization for main-front homogeneous transformations ### PARAMETERS ### A = np.array(front_rel_poses) # First set of poses B = np.array(main_rel_poses) # Second set of poses N = min(A.shape[0], B.shape[0]) r = np.logical_or(np.array(main_reject[:N]), np.array( front_reject[:N])) # If either has high variance, reject the sample omega = np.max([var_R_main, var_R_front]) # Take average across different odometries rho = np.max([var_t_main, var_t_front]) # Take average across different odometries ### PARAMETERS ### cost_main_front = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED) problem_main_front = Problem( manifold=manifold, cost=cost_main_front ) # (2a) Compute the optimization between main and front solver_main_front = CustomSteepestDescent( ) # (3) Instantiate a Pymanopt solver Xopt_main_front = solver_main_front.solve(problem_main_front, x=X0_main_front) print("Initial Guess for Main-Front Transformation: \n {}".format( initial_guess_main_front)) print("Optimal solution between main and front reference frames: \n {}". format(Xopt_main_front)) # Take intermediate values for plotting estimates_x_main_front = solver_main_front.estimates errors_main_front = solver_main_front.errors iters_main_front = solver_main_front.iterations # Metrics dictionary estimates_dict_main_front = { i: T for i, T in zip(iters_main_front, estimates_x_main_front) } error_dict_main_front = { i: e for i, e in zip(iters_main_front, errors_main_front) } # Save intermediate results to a pkl file estimates_fname_main_front = os.path.join(ANALYSIS_RESULTS_PATH, "estimates_main_front.pkl") error_fname_main_front = os.path.join(ANALYSIS_RESULTS_PATH, "error_main_front.pkl") # Save estimates to pickle file with open(estimates_fname_main_front, "wb") as pkl_estimates: pickle.dump(estimates_dict_main_front, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname_main_front, "wb") as pkl_error: pickle.dump(error_dict_main_front, pkl_error) pkl_error.close() # Calculate difference between initial guess and final XOpt_T_main_front = construct_pose(Xopt_main_front[0], Xopt_main_front[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(XOpt_T_main_front, initial_guess_main_front))) # Compute the weighted and unweighted RMSE rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \ rmse_init_t_weighted, rmse_final_R_weighted, \ rmse_final_t_weighted = compute_rmse_weighted(initial_guess_main_front, XOpt_T_main_front, A, B, rho, omega) rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \ rmse_init_t_unweighted, rmse_final_R_unweighted, \ rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_main_front, XOpt_T_main_front, A, B) rmses = [ rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted, rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted, rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted, rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join(ANALYSIS_RESULTS_PATH, "main_front_rmse.txt") display_and_save_rmse(rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH, "main_front_final.txt") np.savetxt(final_estimate_outpath, XOpt_T_main_front) ################################################################################ ######################## MAIN REAR CALIBRATION ################################# ### PARAMETERS ### A = np.array(rear_rel_poses) # First set of poses B = np.array(main_rel_poses) # Second set of poses N = min(A.shape[0], B.shape[0]) r = np.logical_or(np.array(main_reject[:N]), np.array( rear_reject[:N])) # If either has high variance, reject the sample omega = np.max([var_R_main, var_R_rear]) # Take average across different odometries rho = np.max([var_t_main, var_t_rear]) # Take average across different odometries ### PARAMETERS ### cost_main_rear = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED) # Carry out optimization for main-rear homogeneous transformations problem_main_rear = Problem( manifold=manifold, cost=cost_main_rear ) # (2a) Compute the optimization between main and front solver_main_rear = CustomSteepestDescent( ) # (3) Instantiate a Pymanopt solver Xopt_main_rear = solver_main_rear.solve(problem_main_rear, x=X0_main_rear) print("Initial Guess for Main-Rear Transformation: \n {}".format( initial_guess_main_rear)) print("Optimal solution between main and rear reference frames: \n {}". format(Xopt_main_rear)) # Take intermediate values for plotting estimates_x_main_rear = solver_main_rear.estimates errors_main_rear = solver_main_rear.errors iters_main_rear = solver_main_rear.iterations # Metrics dictionary estimates_dict_main_rear = { i: T for i, T in zip(iters_main_rear, estimates_x_main_rear) } error_dict_main_rear = { i: e for i, e in zip(iters_main_rear, errors_main_rear) } # Save intermediate results to a pkl file estimates_fname_main_rear = os.path.join(ANALYSIS_RESULTS_PATH, "estimates_main_rear.pkl") error_fname_main_rear = os.path.join(ANALYSIS_RESULTS_PATH, "error_main_rear.pkl") # Save estimates to pickle file with open(estimates_fname_main_rear, "wb") as pkl_estimates: pickle.dump(estimates_dict_main_rear, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname_main_rear, "wb") as pkl_error: pickle.dump(error_dict_main_rear, pkl_error) pkl_error.close() # Calculate difference between initial guess and final XOpt_T_main_rear = construct_pose(Xopt_main_rear[0], Xopt_main_rear[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(XOpt_T_main_rear, initial_guess_main_rear))) # Compute the weighted and unweighted RMSE rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \ rmse_init_t_weighted, rmse_final_R_weighted, \ rmse_final_t_weighted = compute_rmse_weighted(initial_guess_main_rear, XOpt_T_main_rear, A, B, rho, omega) rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \ rmse_init_t_unweighted, rmse_final_R_unweighted, \ rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_main_rear, XOpt_T_main_rear, A, B) rmses = [ rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted, rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted, rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted, rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join(ANALYSIS_RESULTS_PATH, "main_rear_rmse.txt") display_and_save_rmse(rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH, "main_rear_final.txt") np.savetxt(final_estimate_outpath, XOpt_T_main_rear) ################################################################################ ######################## FRONT REAR CALIBRATION ################################ ### PARAMETERS ### A = np.array(rear_rel_poses) # First set of poses B = np.array(front_rel_poses) # Second set of poses N = min(A.shape[0], B.shape[0]) r = np.logical_or(np.array(front_reject[:N]), np.array( rear_reject[:N])) # If either has high variance, reject the sample omega = np.max([var_R_front, var_R_rear]) # Take average across different odometries rho = np.max([var_t_front, var_t_rear]) # Take average across different odometries ### PARAMETERS ### cost_front_rear = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED) # Carry out optimization for front-rear homogeneous transformations problem_front_rear = Problem( manifold=manifold, cost=cost_front_rear ) # (2a) Compute the optimization between main and front solver_front_rear = CustomSteepestDescent( ) # (3) Instantiate a Pymanopt solver Xopt_front_rear = solver_front_rear.solve(problem_front_rear, x=X0_front_rear) print("Initial Guess for Front-Rear Transformation: \n {}".format( initial_guess_front_rear)) print("Optimal solution between front and rear reference frames: \n {}". format(Xopt_front_rear)) # Take intermediate values for plotting estimates_x_front_rear = solver_front_rear.estimates errors_front_rear = solver_front_rear.errors iters_front_rear = solver_front_rear.iterations # Metrics dictionary estimates_dict_front_rear = { i: T for i, T in zip(iters_front_rear, estimates_x_front_rear) } error_dict_front_rear = { i: e for i, e in zip(iters_front_rear, errors_front_rear) } # Save intermediate results to a pkl file estimates_fname_front_rear = os.path.join(ANALYSIS_RESULTS_PATH, "estimates_front_rear.pkl") error_fname_front_rear = os.path.join(ANALYSIS_RESULTS_PATH, "error_front_rear.pkl") # Save estimates to pickle file with open(estimates_fname_front_rear, "wb") as pkl_estimates: pickle.dump(estimates_dict_front_rear, pkl_estimates) pkl_estimates.close() # Save error to pickle file with open(error_fname_front_rear, "wb") as pkl_error: pickle.dump(error_dict_front_rear, pkl_error) pkl_error.close() # Calculate difference between initial guess and final XOpt_T_front_rear = construct_pose(Xopt_front_rear[0], Xopt_front_rear[1].reshape((3, 1))) print("DIFFERENCE IN MATRICES: \n {}".format( np.subtract(XOpt_T_front_rear, initial_guess_front_rear))) # Compute the weighted and unweighted RMSE rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \ rmse_init_t_weighted, rmse_final_R_weighted, \ rmse_final_t_weighted = compute_rmse_weighted(initial_guess_front_rear, XOpt_T_front_rear, A, B, rho, omega) rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \ rmse_init_t_unweighted, rmse_final_R_unweighted, \ rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_front_rear, XOpt_T_front_rear, A, B) rmses = [ rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted, rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted, rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted, rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted ] # Display and save RMSEs outpath = os.path.join(ANALYSIS_RESULTS_PATH, "front_rear_rmse.txt") display_and_save_rmse(rmses, outpath) # Save final estimates final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH, "front_rear_final.txt") np.savetxt(final_estimate_outpath, XOpt_T_front_rear) ################################################################################ # Display all results print("_________________________________________________________") print("_____________________ALL RESULTS_________________________") print("_________________________________________________________") print("Initial Guess for Main-Front Transformation: \n {}".format( initial_guess_main_front)) print("Optimal solution between main and front reference frames: \n {}". format(Xopt_main_front)) print("_________________________________________________________") print("Initial Guess for Main-Rear Transformation: \n {}".format( initial_guess_main_rear)) print("Optimal solution between main and rear reference frames: \n {}". format(Xopt_main_rear)) print("_________________________________________________________") print("Initial Guess for Front-Rear Transformation: \n {}".format( initial_guess_front_rear)) print("Optimal solution between front and rear reference frames: \n {}". format(Xopt_front_rear)) print("_________________________________________________________")
# Invert the order of the columns so that age is no more the first bernoulli #*************************************************************************** ''' train[['age', 'workclass', 'fnlwgt', 'education.num', 'marital.status', 'occupation', 'relationship', 'race', 'capital.gain', 'capital.loss', 'hours.per.week', 'native.country', 'income', 'sex']] var_distrib = np.array(['continuous', 'categorical', 'continuous',\ 'ordinal', 'categorical', 'categorical', 'categorical',\ 'categorical', 'ordinal', 'ordinal',\ 'continuous', 'categorical', 'bernoulli', 'bernoulli']) ''' p_new = len(var_distrib) cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'ordinal') #***************************************************************** # Formating the data #***************************************************************** # Encode categorical datas for col_idx, colname in enumerate(train.columns): if var_distrib[col_idx] == 'categorical': le = LabelEncoder() # Convert them into numerical values train[colname] = le.fit_transform(train[colname]) le_dict[colname] = deepcopy(le) # Encode binary data
def fit(self, x=None, c=None, n=None, t=None, how='MLE', offset=False, zi=False, lfp=False, tl=None, tr=None, xl=None, xr=None, fixed=None, heuristic='Turnbull', init=[], rr='y', on_d_is_0=False, turnbull_estimator='Fleming-Harrington'): r""" The central feature to SurPyval's capability. This function aimed to have an API to mimic the simplicity of the scipy API. That is, to use a simple :code:`fit()` call, with as many or as few parameters as is needed. Parameters ---------- x : array like, optional Array of observations of the random variables. If x is :code:`None`, xl and xr must be provided. c : array like, optional Array of censoring flag. -1 is left censored, 0 is observed, 1 is right censored, and 2 is intervally censored. If not provided will assume all values are observed. n : array like, optional Array of counts for each x. If data is proivded as counts, then this can be provided. If :code:`None` will assume each observation is 1. t : 2D-array like, optional 2D array like of the left and right values at which the respective observation was truncated. If not provided it assumes that no truncation occurs. how : {'MLE', 'MPP', 'MOM', 'MSE', 'MPS'}, optional Method to estimate parameters, these are: - MLE : Maximum Likelihood Estimation - MPP : Method of Probability Plotting - MOM : Method of Moments - MSE : Mean Square Error - MPS : Maximum Product Spacing offset : boolean, optional If :code:`True` finds the shifted distribution. If not provided assumes not a shifted distribution. Only works with distributions that are supported on the half-real line. tl : array like or scalar, optional Values of left truncation for observations. If it is a scalar value assumes each observation is left truncated at the value. If an array, it is the respective 'late entry' of the observation tr : array like or scalar, optional Values of right truncation for observations. If it is a scalar value assumes each observation is right truncated at the value. If an array, it is the respective right truncation value for each observation xl : array like, optional Array like of the left array for 2-dimensional input of x. This is useful for data that is all intervally censored. Must be used with the :code:`xr` input. xr : array like, optional Array like of the right array for 2-dimensional input of x. This is useful for data that is all intervally censored. Must be used with the :code:`xl` input. fixed : dict, optional Dictionary of parameters and their values to fix. Fixes parameter by name. heuristic : {'"Blom", "Median", "ECDF", "Modal", "Midpoint", "Mean", "Weibull", "Benard", "Beard", "Hazen", "Gringorten", "None", "Tukey", "DPW", "Fleming-Harrington", "Kaplan-Meier", "Nelson-Aalen", "Filliben", "Larsen", "Turnbull"} Plotting method to use, if using the probability plotting, MPP, method. init : array like, optional initial guess of parameters. Useful if method is failing. rr : ('y', 'x') The dimension on which to minimise the spacing between the line and the observation. If 'y' the mean square error between the line and vertical distance to each point is minimised. If 'x' the mean square error between the line and horizontal distance to each point is minimised. on_d_is_0 : boolean, optional For the case when using MPP and the highest value is right censored, you can choosed to include this value into the regression analysis or not. That is, if :code:`False`, all values where there are 0 deaths are excluded from the regression. If :code:`True` all values regardless of whether there is a death or not are included in the regression. turnbull_estimator : ('Nelson-Aalen', 'Kaplan-Meier', or 'Fleming-Harrington'), str, optional If using the Turnbull heuristic, you can elect to use either the KM, NA, or FH estimator with the Turnbull estimates of r, and d. Defaults to FH. Returns ------- model : Parametric A parametric model with the fitted parameters and methods for all functions of the distribution using the fitted parameters. Examples -------- >>> from surpyval import Weibull >>> import numpy as np >>> x = Weibull.random(100, 10, 4) >>> model = Weibull.fit(x) >>> print(model) Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MLE Parameters : alpha: 10.551521182640098 beta: 3.792549834495306 >>> Weibull.fit(x, how='MPS', fixed={'alpha' : 10}) Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MPS Parameters : alpha: 10.0 beta: 3.4314657446866836 >>> Weibull.fit(xl=x-1, xr=x+1, how='MPP') Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MPP Parameters : alpha: 9.943092756713078 beta: 8.613016934518258 >>> c = np.zeros_like(x) >>> c[x > 13] = 1 >>> x[x > 13] = 13 >>> c = c[x > 6] >>> x = x[x > 6] >>> Weibull.fit(x=x, c=c, tl=6) Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MLE Parameters : alpha: 10.363725328793413 beta: 4.9886821457305865 """ if offset and self.name in [ 'Normal', 'Beta', 'Uniform', 'Gumbel', 'Logistic' ]: raise ValueError( '{dist} distribution cannot be offset'.format(dist=self.name)) if how not in PARA_METHODS: raise ValueError('"how" must be one of: ' + str(PARA_METHODS)) if how == 'MPP' and self.name == 'ExpoWeibull': raise ValueError( 'ExpoWeibull distribution does not work with probability plot fitting' ) if t is not None and how == 'MPS': raise ValueError( 'Maximum product spacing doesn\'t yet support tuncation') if t is not None and how == 'MSE': raise NotImplementedError( 'Mean square error doesn\'t yet support tuncation') if t is not None and how == 'MOM': raise ValueError( 'Maximum product spacing doesn\'t support tuncation') if (lfp or zi) & (how != 'MLE'): raise ValueError( 'Limited failure or zero-inflated models can only be made with MLE' ) if (zi & (self.support[0] != 0)): raise ValueError( "zero-inflated models can only work with models starting at 0") x, c, n, t = surpyval.xcnt_handler(x=x, c=c, n=n, t=t, tl=tl, tr=tr, xl=xl, xr=xr) if surpyval.utils.check_no_censoring(c) and (how == 'MOM'): raise ValueError('Method of moments doesn\'t support censoring') if (surpyval.utils.no_left_or_int(c)) and (how == 'MPP') and ( not heuristic == 'Turnbull'): raise ValueError( 'Probability plotting estimation with left or interval censoring only works with Turnbull heuristic' ) if (heuristic == 'Turnbull') & (not ((-1 in c) or (2 in c))) & ( (~np.isfinite(t[:, 1])).any()): # The Turnbull method is extremely memory intensive. # So if no left or interval censoring and no right-truncation # then this is equivalent. heuristic = turnbull_estimator if (not offset) & (not zi): if x.ndim == 2: if ((x[:, 0] <= self.support[0]) & (c == 0)).any(): raise ValueError( "Observed values must be in support of distribution; are some of your observed values 0, -Inf, or Inf?" ) else: if ((x <= self.support[0]) & (c == 0)).any(): raise ValueError( "Observed values must be in support of distribution; are some of your observed values 0, -Inf, or Inf?" ) # Passed checks data = {'x': x, 'c': c, 'n': n, 't': t} model = para.Parametric(self, how, data, offset, lfp, zi) fitting_info = {} if how != 'MPP': transform, inv_trans, funcs, inv_f = bounds_convert( x, model.bounds) const, fixed_idx, not_fixed = fix_idx_and_function( fixed, model.param_map, funcs) fitting_info['transform'] = transform fitting_info['inv_trans'] = inv_trans fitting_info['funcs'] = funcs fitting_info['inv_f'] = inv_f fitting_info['const'] = const fitting_info['fixed_idx'] = fixed_idx fitting_info['not_fixed'] = not_fixed # Need a better general fitter to include offset if init == []: if self.name in ['Gumbel', 'Beta', 'Normal', 'Uniform']: init = np.array(self._parameter_initialiser(x, c, n)) else: if x.ndim == 2: init_mask = np.logical_or(x[:, 0] <= self.support[0], x[:, 0] >= self.support[1]) init_mask = ~np.logical_and(init_mask, c == 0) xl = x[init_mask, 0] xr = x[init_mask, 1] x_init = np.vstack([xl, xr]).T else: init_mask = np.logical_or(x <= self.support[0], x >= self.support[1]) init_mask = ~np.logical_and(init_mask, c == 0) x_init = x[init_mask] c_init = c[init_mask] n_init = n[init_mask] init = np.array( self._parameter_initialiser(x_init, c_init, n_init, offset=offset)) if offset: init[0] = x.min() - 1. if lfp: _, _, _, F = nonp.plotting_positions( x, c, n, heuristic='Nelson-Aalen') max_F = np.max(F) if max_F > 0.5: init = np.concatenate([init, [0.99]]) else: init = np.concatenate( [init_from_bounds(self), [max_F]]) if zi: init = np.concatenate( [init, [(n[x == 0]).sum() / n.sum()]]) init = transform(init) init = init[not_fixed] fitting_info['init'] = init else: # Probability plotting method does not need an initial estimate fitting_info['rr'] = rr fitting_info['heuristic'] = heuristic fitting_info['on_d_is_0'] = on_d_is_0 fitting_info['turnbull_estimator'] = turnbull_estimator fitting_info['init'] = None model.fitting_info = fitting_info results = METHOD_FUNC_DICT[how](model) for k, v in results.items(): setattr(model, k, v) if hasattr(model, 'params'): for k, v in zip(self.param_names, model.params): setattr(model, k, v) return model
def fit_weights_and_save( weights_file, ca_data_file='rs_vm_denoise_200605.npy', opto_silencing_data_file='vip_halo_data_for_sim.npy', opto_activation_data_file='vip_chrimson_data_for_sim.npy', constrain_wts=None, allow_var=True, fit_s02=True, constrain_isn=True, tv=False, l2_penalty=0.01, init_noise=0.1, init_W_from_lsq=False, init_W_from_lbfgs=False, scale_init_by=1, init_W_from_file=False, init_file=None, correct_Eta=False, init_Eta_with_s02=False, init_Eta12_with_dYY=False, use_opto_transforms=False, share_residuals=False, stimwise=False, simulate1=True, simulate2=False, help_constrain_isn=True, ignore_halo_vip=False, verbose=True, free_amplitude=False, norm_opto_transforms=False, zero_extra_weights=None, allow_s2=True): nsize, ncontrast = 6, 6 npfile = np.load(ca_data_file, allow_pickle=True)[( )] #,{'rs':rs,'rs_denoise':rs_denoise},allow_pickle=True) rs = npfile['rs'] #rs_denoise = npfile['rs_denoise'] nsize, ncontrast, ndir = 6, 6, 8 #ori_dirs = [[0,4],[2,6]] #[[0,4],[1,3,5,7],[2,6]] ori_dirs = [[0, 1, 2, 3, 4, 5, 6, 7]] nT = len(ori_dirs) nS = len(rs[0]) def sum_to_1(r): R = r.reshape((r.shape[0], -1)) #R = R/np.nansum(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis] R = R / np.nansum(R, axis=1)[:, np.newaxis] # changed 8/28 return R def norm_to_mean(r): R = r.reshape((r.shape[0], -1)) R = R / np.nanmean(R[:, ~np.isnan(R.sum(0))], axis=1)[:, np.newaxis] return R Rs = [[None, None] for i in range(len(rs))] Rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))] rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))] for iR, r in enumerate(rs): #rs_denoise): #print(iR) for ialign in range(nS): #Rs[iR][ialign] = r[ialign][:,:nsize,:] #sm = np.nanmean(np.nansum(np.nansum(Rs[iR][ialign],1),1)) #Rs[iR][ialign] = Rs[iR][ialign]/sm #print('frac isnan Rs %d,%d: %f'%(iR,ialign,np.isnan(r[ialign]).mean())) Rs[iR][ialign] = sum_to_1(r[ialign][:, :nsize, :]) # Rs[iR][ialign] = von_mises_denoise(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))) kernel = np.ones((1, 2, 2)) kernel = kernel / kernel.sum() for iR, r in enumerate(rs): for ialign in range(nS): for iori in range(nT): #print('this Rs shape: '+str(Rs[iR][ialign].shape)) #print('this Rs reshaped shape: '+str(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]].shape)) #print('this Rs max percent nan: '+str(np.isnan(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]]).mean(-1).max())) Rso[iR][ialign][iori] = np.nanmean( Rs[iR][ialign].reshape( (-1, nsize, ncontrast, ndir))[:, :, :, ori_dirs[iori]], -1) Rso[iR][ialign][iori][:, :, 0] = np.nanmean( Rso[iR][ialign][iori][:, :, 0], 1)[:, np.newaxis] # average 0 contrast values #print('frac isnan pre-conv Rso %d,%d,%d: %f'%(iR,ialign,iori,np.isnan(Rso[iR][ialign][iori]).mean())) Rso[iR][ialign][iori][:, 1:, 1:] = ssi.convolve( Rso[iR][ialign][iori], kernel, 'valid') Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape( Rso[iR][ialign][iori].shape[0], -1) #print('frac isnan Rso %d,%d,%d: %f'%(iR,ialign,iori,np.isnan(Rso[iR][ialign][iori]).mean())) #print('sum of Rso isnan: '+str(np.isnan(Rso[iR][ialign][iori]).sum(1))) #Rso[iR][ialign][iori] = Rso[iR][ialign][iori]/np.nanmean(Rso[iR][ialign][iori],-1)[:,np.newaxis] def set_bound(bd, code, val=0): # set bounds to 0 where 0s occur in 'code' for iitem in range(len(bd)): bd[iitem][code[iitem]] = val nN = 36 nS = 2 nP = 2 nT = 1 nQ = 4 # code for bounds: 0 , constrained to 0 # +/-1 , constrained to +/-1 # 1.5, constrained to [0,1] # 2 , constrained to [0,inf) # -2 , constrained to (-inf,0] # 3 , unconstrained Wmx_bounds = 3 * np.ones((nP, nQ), dtype=int) Wmx_bounds[0, :] = 2 # L4 PCs are excitatory Wmx_bounds[0, 1] = 0 # SSTs don't receive L4 input if allow_var: Wsx_bounds = 3 * np.ones( Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds) Wsx_bounds[0, 1] = 0 else: Wsx_bounds = np.zeros( Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds) Wmy_bounds = 3 * np.ones((nQ, nQ), dtype=int) Wmy_bounds[0, :] = 2 # PCs are excitatory Wmy_bounds[1:, :] = -2 # all the cell types except PCs are inhibitory Wmy_bounds[1, 1] = 0 # SSTs don't inhibit themselves # Wmy_bounds[3,1] = 0 # PVs are allowed to inhibit SSTs, consistent with Hillel's unpublished results, but not consistent with Pfeffer et al. Wmy_bounds[ 2, 0] = 0 # VIPs don't inhibit L2/3 PCs. According to Pfeffer et al., only L5 PCs were found to get VIP inhibition if not zero_extra_weights is None: Wmx_bounds[zero_extra_weights[0]] = 0 Wmy_bounds[zero_extra_weights[1]] = 0 if allow_var: Wsy_bounds = 3 * np.ones( Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds) Wsy_bounds[1, 1] = 0 Wsy_bounds[3, 1] = 0 Wsy_bounds[2, 0] = 0 else: Wsy_bounds = np.zeros( Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds) if not constrain_wts is None: for wt in constrain_wts: Wmy_bounds[wt[0], wt[1]] = 0 Wsy_bounds[wt[0], wt[1]] = 0 def tile_nS_nT_nN(kernel): row = np.concatenate([kernel for idim in range(nS * nT)], axis=0)[np.newaxis, :] tiled = np.concatenate([row for irow in range(nN)], axis=0) return tiled def set_bounds_by_code(lb, ub, bdlist): set_bound(lb, [bd == 0 for bd in bdlist], val=0) set_bound(ub, [bd == 0 for bd in bdlist], val=0) set_bound(lb, [bd == 2 for bd in bdlist], val=0) set_bound(ub, [bd == -2 for bd in bdlist], val=0) set_bound(lb, [bd == 1 for bd in bdlist], val=1) set_bound(ub, [bd == 1 for bd in bdlist], val=1) set_bound(lb, [bd == 1.5 for bd in bdlist], val=0) set_bound(ub, [bd == 1.5 for bd in bdlist], val=1) set_bound(lb, [bd == -1 for bd in bdlist], val=-1) set_bound(ub, [bd == -1 for bd in bdlist], val=-1) if allow_s2: if fit_s02: s02_bounds = 2 * np.ones( (nQ, )) # permitting noise as a free parameter else: s02_bounds = np.ones((nQ, )) else: s02_bounds = np.zeros((nQ, )) k_bounds = 1.5 * np.ones((nQ * (nS - 1), )) #k_bounds[1] = 0 # temporary: spatial kernel constrained to 0 for SST #k_bounds[2] = 0 # temporary: spatial kernel constrained to 0 for VIP kappa_bounds = np.ones((1, )) # kappa_bounds = 2*np.ones((1,)) T_bounds = 1.5 * np.ones((nQ * (nT - 1), )) X_bounds = tile_nS_nT_nN(np.array([2, 1])) # X_bounds = np.array([np.array([2,1,2,1])]*nN) Xp_bounds = tile_nS_nT_nN(np.array([3, 1])) # Xp_bounds = np.array([np.array([3,1,3,1])]*nN) # Y_bounds = tile_nS_nT_nN(2*np.ones((nQ,))) # # Y_bounds = 2*np.ones((nN,nT*nS*nQ)) Eta_bounds = tile_nS_nT_nN(3 * np.ones((nQ, ))) # Eta_bounds = 3*np.ones((nN,nT*nS*nQ)) if allow_s2: if allow_var: Xi_bounds = tile_nS_nT_nN(3 * np.ones((nQ, ))) else: Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, ))) else: Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, ))) # Xi_bounds = 3*np.ones((nN,nT*nS*nQ)) h1_bounds = -2 * np.ones((1, )) h2_bounds = 2 * np.ones((1, )) bl_bounds = 3 * np.ones((nQ, )) if free_amplitude: amp_bounds = 2 * np.ones((nT * nS * nQ, )) else: amp_bounds = 1 * np.ones((nT * nS * nQ, )) # shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ,),(1,),(nN,nS*nP),(nN,nS*nQ),(nN,nS*nQ),(nN,nS*nQ)] shapes1 = [(nP, nQ), (nQ, nQ), (nP, nQ), (nQ, nQ), (nQ, ), (nQ * (nS - 1), ), (1, ), (nQ * (nT - 1), ), (1, ), (1, ), (nQ, ), (nQ * nS * nT, )] shapes2 = [(nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ), (nN, nT * nS * nQ)] #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1]))) #print('size of shapes2: '+str(np.sum([np.prod(shp) for shp in shapes2]))) # Wmx, Wmy, Wsx, Wsy, s02, k, kappa,T, h1, h2 #XX, XXp, Eta, Xi #bdlist = [Wmx_bounds,Wmy_bounds,Wsx_bounds,Wsy_bounds,s02_bounds,k_bounds,kappa_bounds,T_bounds,X_bounds,Xp_bounds,Eta_bounds,Xi_bounds,h1_bounds,h2_bounds] bd1list = [ Wmx_bounds, Wmy_bounds, Wsx_bounds, Wsy_bounds, s02_bounds, k_bounds, kappa_bounds, T_bounds, h1_bounds, h2_bounds, bl_bounds, amp_bounds ] bd2list = [X_bounds, Xp_bounds, Eta_bounds, Xi_bounds] lb1, ub1 = [[sgn * np.inf * np.ones(shp) for shp in shapes1] for sgn in [-1, 1]] set_bounds_by_code(lb1, ub1, bd1list) lb2, ub2 = [[sgn * np.inf * np.ones(shp) for shp in shapes2] for sgn in [-1, 1]] set_bounds_by_code(lb2, ub2, bd2list) #set_bound(lb,[bd==0 for bd in bdlist],val=0) #set_bound(ub,[bd==0 for bd in bdlist],val=0) # #set_bound(lb,[bd==2 for bd in bdlist],val=0) # #set_bound(ub,[bd==-2 for bd in bdlist],val=0) # #set_bound(lb,[bd==1 for bd in bdlist],val=1) #set_bound(ub,[bd==1 for bd in bdlist],val=1) # #set_bound(lb,[bd==1.5 for bd in bdlist],val=0) #set_bound(ub,[bd==1.5 for bd in bdlist],val=1) # #set_bound(lb,[bd==-1 for bd in bdlist],val=-1) #set_bound(ub,[bd==-1 for bd in bdlist],val=-1) # for bd in [lb,ub]: # for ind in [2,3]: # bd[ind][:,1] = 0 # temporary for no variation expt. # lb[2] = np.zeros_like(lb[2]) # lb[3] = np.zeros_like(lb[3]) # lb[4] = np.ones_like(lb[4]) # lb[5] = np.zeros_like(lb[5]) # ub[2] = np.zeros_like(ub[2]) # ub[3] = np.zeros_like(ub[3]) # ub[4] = np.ones_like(ub[4]) # ub[5] = np.ones_like(ub[5]) # temporary for no variation expt. lb1 = np.concatenate([a.flatten() for a in lb1]) ub1 = np.concatenate([b.flatten() for b in ub1]) lb2 = np.concatenate([a.flatten() for a in lb2]) ub2 = np.concatenate([b.flatten() for b in ub2]) bounds1 = [(a, b) for a, b in zip(lb1, ub1)] bounds2 = [(a, b) for a, b in zip(lb2, ub2)] nS = 2 #print('nT: '+str(nT)) ndims = 5 ncelltypes = 5 Yhat = [[None for iT in range(nT)] for iS in range(nS)] Xhat = [[None for iT in range(nT)] for iS in range(nS)] Ypc_list = [[None for iT in range(nT)] for iS in range(nS)] Xpc_list = [[None for iT in range(nT)] for iS in range(nS)] mx = [None for iS in range(nS)] for iS in range(nS): mx[iS] = np.zeros((ncelltypes, )) yy = [None for icelltype in range(ncelltypes)] for icelltype in range(ncelltypes): yy[icelltype] = np.nanmean(Rso[icelltype][iS][0], 0) mx[iS][icelltype] = np.nanmax(yy[icelltype]) for iT in range(nT): y = [ np.nanmean(Rso[icelltype][iS][iT], axis=0)[:, np.newaxis] / mx[iS][icelltype] for icelltype in range(1, ncelltypes) ] Ypc_list[iS][iT] = [None for icelltype in range(1, ncelltypes)] for icelltype in range(1, ncelltypes): # as currently written, penalties involving (X,Y)pc_list are effectively artificially smaller by # a factor of mx[iS][icelltype] compared to what one would expect from the (X,Y)-penalty as defined # subsequently. rss = Rso[icelltype][iS][iT].copy( ) #/mx[iS][icelltype] #.reshape(Rs[icelltype][ialign].shape[0],-1) #print('sum of isnan: '+str(np.isnan(rss).sum(1))) #rss = Rso[icelltype][iS][iT].copy() #.reshape(Rs[icelltype][ialign].shape[0],-1) rss = rss[np.isnan(rss).sum(1) == 0] # print(rss.max()) # rss[rss<0] = 0 # rss = rss[np.random.randn(rss.shape[0])>0] try: u, s, v = np.linalg.svd(rss - np.mean(rss, 0)[np.newaxis]) Ypc_list[iS][iT][icelltype - 1] = [ (s[idim], v[idim]) for idim in range(ndims) ] # print('yep on Y') # print(np.min(np.sum(rs[icelltype][iS][iT],axis=1))) except: print('nope on Y') #print('shape of rss: '+str(rss.shape)) #print('mean of rss: '+str(np.mean(np.isnan(rss)))) #print('min of this rs: '+str(np.min(np.sum(rs[icelltype][iS][iT],axis=1)))) Yhat[iS][iT] = np.concatenate(y, axis=1) # x = sim_utils.columnize(Rso[0][iS][iT])[:,np.newaxis] icelltype = 0 #x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]#/mx[iS][icelltype] x = np.nanmean(Rso[icelltype][iS][iT], 0)[:, np.newaxis] / mx[iS][icelltype] # opto_column = np.concatenate((np.zeros((nN,)),np.zeros((nNO/2,)),np.ones((nNO/2,))),axis=0)[:,np.newaxis] Xhat[iS][iT] = np.concatenate((x, np.ones_like(x)), axis=1) # Xhat[iS][iT] = np.concatenate((x,np.ones_like(x),opto_column),axis=1) icelltype = 0 #rss = Rso[icelltype][iS][iT].copy()/mx[iS][icelltype] rss = Rso[icelltype][iS][iT].copy() rss = rss[np.isnan(rss).sum(1) == 0] # try: u, s, v = np.linalg.svd(rss - rss.mean(0)[np.newaxis]) Xpc_list[iS][iT] = [None for iinput in range(2)] Xpc_list[iS][iT][0] = [(s[idim], v[idim]) for idim in range(ndims)] Xpc_list[iS][iT][1] = [(0, np.zeros((Xhat[0][0].shape[0], ))) for idim in range(ndims)] # except: # print('nope on X') # print(np.mean(np.isnan(rss))) # print(np.min(np.sum(Rso[icelltype][iS][iT],axis=1))) nN, nP = Xhat[0][0].shape #print('nP: '+str(nP)) nQ = Yhat[0][0].shape[1] import sim_utils pop_rate_fn = sim_utils.f_miller_troyer pop_deriv_fn = sim_utils.fprime_miller_troyer def compute_f_(Eta, Xi, s02): return sim_utils.f_miller_troyer( Eta, Xi**2 + np.concatenate([s02 for ipixel in range(nS * nT)])) def compute_fprime_m_(Eta, Xi, s02): return sim_utils.fprime_miller_troyer( Eta, Xi**2 + np.concatenate([s02 for ipixel in range(nS * nT)])) * Xi def compute_fprime_s_(Eta, Xi, s02): s2 = Xi**2 + np.concatenate((s02, s02), axis=0) return sim_utils.fprime_s_miller_troyer(Eta, s2) * (Xi / s2) def sorted_r_eigs(w): drW, prW = np.linalg.eig(w) srtinds = np.argsort(drW) return drW[srtinds], prW[:, srtinds] # 0.Wmx, 1.Wmy, 2.Wsx, 3.Wsy, 4.s02,5.K, 6.kappa,7.T,8.XX, 9.XXp, 10.Eta, 11.Xi, 12.h1, 13.h2 shapes1 = [(nP, nQ), (nQ, nQ), (nP, nQ), (nQ, nQ), (nQ, ), (nQ * (nS - 1), ), (1, ), (nQ * (nT - 1), ), (1, ), (1, ), (nQ, ), (nT * nS * nQ, )] shapes2 = [(nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ), (nN, nT * nS * nQ)] #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1]))) #print('size of shapes2: '+str(np.sum([np.prod(shp) for shp in shapes2]))) import calnet.fitting_spatial_feature YYhat = calnet.utils.flatten_nested_list_of_2d_arrays(Yhat) XXhat = calnet.utils.flatten_nested_list_of_2d_arrays(Xhat) opto_dict = np.load(opto_silencing_data_file, allow_pickle=True)[()] Yhat_opto = opto_dict['Yhat_opto'] Yhat_opto = np.nanmean(np.reshape(Yhat_opto, (nN, 2, nS, 2, nQ)), 3).reshape((nN * 2, -1)) Yhat_opto[0::12] = np.nanmean(Yhat_opto[0::12], axis=0)[np.newaxis] Yhat_opto[1::12] = np.nanmean(Yhat_opto[1::12], axis=0)[np.newaxis] Yhat_opto = Yhat_opto / np.nanmax(Yhat_opto[0::2], 0)[np.newaxis, :] #print(Yhat_opto.shape) h_opto = opto_dict['h_opto'] #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2] YYhat_halo = Yhat_opto.reshape((nN, 2, -1)) opto_transform1 = calnet.utils.fit_opto_transform( YYhat_halo, norm01=norm_opto_transforms) opto_transform1.res[:, [0, 2, 3, 4, 6, 7]] = 0 dYY1 = opto_transform1.transform(YYhat) - opto_transform1.preprocess(YYhat) #YYhat_halo_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_halo) #dYY1 = YYhat_halo_sim[:,1,:] - YYhat_halo_sim[:,0,:] def overwrite_plus_n(arr, to_overwrite, n): arr[:, to_overwrite] = arr[:, int(to_overwrite + n)] return arr for to_overwrite in [1, 2]: n = 4 dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \ = [overwrite_plus_n(x,to_overwrite,n) for x in \ [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]] for to_overwrite in [7]: n = -4 dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \ = [overwrite_plus_n(x,to_overwrite,n) for x in \ [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]] if ignore_halo_vip: dYY1[:, 2::nQ] = np.nan #for to_overwrite in [1,2]: # dYY1[:,to_overwrite] = dYY1[:,to_overwrite+4] #for to_overwrite in [7]: # dYY1[:,to_overwrite] = dYY1[:,to_overwrite-4] #Yhat_opto = opto_dict['Yhat_opto'] #for iS in range(nS): # mx = np.zeros((nQ,)) # for iQ in range(nQ): # slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ) # mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer]) # Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ] ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] #print(Yhat_opto.shape) #h_opto = opto_dict['h_opto'] #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2] #for to_overwrite in [1,2,5,6]: # overwrite sst and vip with off-centered values # dYY1[:,to_overwrite] = dYY1[:,to_overwrite+8] #for to_overwrite in [11,15]: # dYY1[:,to_overwrite] = np.nan #dYY1[:,to_overwrite-8] opto_dict = np.load(opto_activation_data_file, allow_pickle=True)[()] Yhat_opto = opto_dict['Yhat_opto'] Yhat_opto = np.nanmean(np.reshape(Yhat_opto, (nN, 2, nS, 2, nQ)), 3).reshape((nN * 2, -1)) Yhat_opto[0::12] = np.nanmean(Yhat_opto[0::12], axis=0)[np.newaxis] Yhat_opto[1::12] = np.nanmean(Yhat_opto[1::12], axis=0)[np.newaxis] Yhat_opto = Yhat_opto / Yhat_opto[0::2].max(0)[np.newaxis, :] #print(Yhat_opto.shape) h_opto = opto_dict['h_opto'] #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2] YYhat_chrimson = Yhat_opto.reshape((nN, 2, -1)) opto_transform2 = calnet.utils.fit_opto_transform( YYhat_chrimson, norm01=norm_opto_transforms) dYY2 = opto_transform2.transform(YYhat) - opto_transform2.preprocess(YYhat) #YYhat_chrimson_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_chrimson) #dYY2 = YYhat_chrimson_sim[:,1,:] - YYhat_chrimson_sim[:,0,:] #Yhat_opto = opto_dict['Yhat_opto'] #for iS in range(nS): # mx = np.zeros((nQ,)) # for iQ in range(nQ): # slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ) # mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer]) # Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ] ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] #print(Yhat_opto.shape) #h_opto = opto_dict['h_opto'] #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2] #print('dYY1 mean: %03f'%np.nanmean(np.abs(dYY1))) #print('dYY2 mean: %03f'%np.nanmean(np.abs(dYY2))) dYY = np.concatenate((dYY1, dYY2), axis=0) #titles = ['VIP silencing','VIP activation'] #for itype in [0,1,2,3]: # plt.figure(figsize=(5,2.5)) # for iyy,dyy in enumerate([dYY1,dYY2]): # plt.subplot(1,2,iyy+1) # if np.sum(np.isnan(dyy[:,itype]))==0: # sca.scatter_size_contrast(YYhat[:,itype],YYhat[:,itype]+dyy[:,itype],nsize=6,ncontrast=6)#,mn=0) # plt.title(titles[iyy]) # plt.xlabel('cell type %d event rate, \n light off'%itype) # plt.ylabel('cell type %d event rate, \n light on'%itype) # ut.erase_top_right() # plt.tight_layout() # ut.mkdir('figures') # plt.savefig('figures/scatter_light_on_light_off_target_celltype_%d.eps'%itype) opto_mask = ~np.isnan(dYY) #dYY[nN:][~opto_mask[nN:]] = -dYY[:nN][~opto_mask[nN:]] #print('mean of opto_mask: '+str(opto_mask.mean())) #dYY[~opto_mask] = 0 def zero_nans(arr): arr[np.isnan(arr)] = 0 return arr #dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\ # opto_transform2.slope,opto_transform2.intercept,opto_transform2.res\ # = [zero_nans(x) for x in \ # [dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\ # opto_transform2.slope,opto_transform2.intercept,opto_transform2.res]] dYY = zero_nans(dYY) to_adjust = np.logical_or(np.isnan(opto_transform2.slope[0]), np.isnan(opto_transform2.intercept[0])) opto_transform2.slope[:, to_adjust] = 1 / opto_transform1.slope[:, to_adjust] opto_transform2.intercept[:, to_adjust] = -opto_transform1.intercept[:, to_adjust] / opto_transform1.slope[:, to_adjust] opto_transform2.res[:, to_adjust] = -opto_transform1.res[:, to_adjust] / opto_transform1.slope[:, to_adjust] #np.save('/Users/dan/Documents/notebooks/mossing-PC/shared_data/calnet_data/dYY.npy',dYY) from importlib import reload reload(calnet) #reload(calnet.fitting_2step_spatial_feature_opto_tight_nonlinear) reload(sim_utils) # reload(calnet.fitting_spatial_feature) # W0list = [np.ones(shp) for shp in shapes] wt_dict = {} wt_dict['X'] = 3 #1 wt_dict['Y'] = 3 #wt_dict['Eta'] = 3 # 1 # wt_dict['Xi'] = 0.1 wt_dict['stims'] = np.ones((nN, 1)) #(np.arange(30)/30)[:,np.newaxis]**1 # wt_dict['barrier'] = 0. #30.0 #0.1 wt_dict['opto'] = 1 #1e1 wt_dict['isn'] = 0.3 wt_dict['tv'] = 1 spont_frac = 0.5 pc_frac = 0.5 wt_dict['stimsOpto'] = (1 - spont_frac) * 6 / 5 * np.ones((nN, 1)) wt_dict['stimsOpto'][0::6] = spont_frac * 6 wt_dict['celltypesOpto'] = (1 - pc_frac) * 4 / 3 * np.ones( (1, nQ * nS * nT)) wt_dict['celltypesOpto'][0, 0::nQ] = pc_frac * 4 wt_dict['dirOpto'] = np.array((1, 0.3)) wt_dict['dYY'] = 10 #10 wt_dict['coupling'] = 1e-3 wt_dict['smi'] = 0.1 wt_dict['smi_halo'] = 30 wt_dict['smi_chrimson'] = 0.1 ##temporary no_opto wt_dict['opto'] = 0 wt_dict['dirOpto'] = np.array((1, 1)) #wt_dict['stimsOpto'] = np.ones((nN,1)) wt_dict['celltypesOpto'] = np.ones((1, nQ * nS * nT)) wt_dict['smi'] = 0 #0.01 # 0 wt_dict['smi_halo'] = 0 #1 # 0 wt_dict['smi_chrimson'] = 0 #0.01 # 0 wt_dict['isn'] = 0.1 wt_dict['tv'] = 0.1 wt_dict['X'] = 3 wt_dict['Eta'] = 10 #3 # 1 # ## temporary opto from no_opto #wt_dict['opto'] = 0.01 #wt_dict['tv'] = 0.3#0.1 np.save( 'XXYYhat.npy', { 'YYhat': YYhat, 'XXhat': XXhat, 'rs': rs, 'Rs': Rs, 'Rso': Rso, 'Ypc_list': Ypc_list, 'Xpc_list': Xpc_list }) if allow_s2: Eta0 = invert_f_mt(YYhat) else: Eta0 = invert_f_mt(YYhat, s02=0) # Wmx, Wmy, Wsx, Wsy, s02, k, kappa,T, h1, h2 #XX, XXp, Eta, Xi opt = fmc.gen_opt(nS=nS, nT=nT) opt['allow_s02'] = False opt['allow_A'] = False opt['allow_B'] = True ntries = 1 nhyper = 1 dt = 1e-1 niter = int(np.round(10 / dt)) #int(1e4) perturbation_size = 5e-2 # learning_rate = 1e-4 # 1e-5 #np.linspace(3e-4,1e-3,niter+1) # 1e-5 #l2_penalty = 0.1 W1t = [[None for itry in range(ntries)] for ihyper in range(nhyper)] W2t = [[None for itry in range(ntries)] for ihyper in range(nhyper)] loss = np.zeros((nhyper, ntries)) is_neg = np.array([b[1] for b in bounds1]) == 0 counter = 0 negatize = [np.zeros(shp, dtype='bool') for shp in shapes1] #print(shapes1) for ishp, shp in enumerate(shapes1): nel = np.prod(shp) negatize[ishp][:][is_neg[counter:counter + nel].reshape(shp)] = True counter = counter + nel for ihyper in range(nhyper): for itry in range(ntries): #print((ihyper,itry)) #[0.(nP,nQ),1.(nQ,nQ),2.(nP,nQ),3.(nQ,nQ),4.(nQ,),5.(nQ*(nS-1),),6.(1,),7.(nQ*(nT-1),),8.(1,),9.(1,),10.(nQ,),11.(nQ*nS*nT,)] W10list = [ init_noise * (ihyper + 1) * np.random.rand(*shp) for shp in shapes1 ] W20list = [ init_noise * (ihyper + 1) * np.random.rand(*shp) for shp in shapes2 ] #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1]))) #print('size of w10: '+str(np.sum([np.size(x) for x in W10list]))) #print('len(W10list) : '+str(len(W10list))) counter = 0 for ishp, shp in enumerate(shapes1): W10list[ishp][negatize[ishp]] = -W10list[ishp][negatize[ishp]] W10list[4] = np.ones(shapes1[4]) # s02 W10list[5] = np.ones(shapes1[5]) # K W10list[6] = np.ones(shapes1[6]) # kappa W10list[7] = np.ones(shapes1[7]) # T W10list[8] = np.zeros(shapes1[8]) # h1 W10list[9] = np.zeros(shapes1[9]) # h2 W10list[10] = np.zeros(shapes1[10]) # baseline W10list[11] = np.ones(shapes1[11]) # amplitude W20list[0] = np.concatenate(Xhat, axis=1) #XX W20list[1] = np.zeros_like(W20list[1]) #XXp W20list[2] = Eta0.copy() #np.zeros(shapes[10]) #Eta W20list[3] = np.zeros(shapes2[3]) #Xi #[Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi] if init_W_from_lsq: W10list[0], W10list[1] = initialize_W(Xhat, Yhat, scale_by=scale_init_by, allow_s2=allow_s2) for ivar in range(0, 2): W10list[ ivar] = W10list[ivar] + init_noise * np.random.randn( *W10list[ivar].shape) if init_W_from_lbfgs: print(opt) opt_param, result, _, _, _, _, _, _, _, _, _, _, _ = fmc.initialize_params( XXhat, YYhat, opt, wpcpc=5, wpvpv=-6) these_shapes = [(nP, nQ), (nQ, nQ), (nQ, ), (nQ, ), (nQ, ), (nQ, )] Wmx0, Wmy0, K0, s020, amplitude0, baseline0 = calnet.utils.parse_thing( opt_param, these_shapes) if init_Eta_with_s02: #assert(True==False) Eta0 = invert_f_mt_with_s02(YYhat - np.tile(baseline0, nS * nT), s020, nS=nS, nT=nT) W20list[2] = Eta0.copy() #Wmx0 = opt_param[:nP] #Wmy0 = opt_param[nP:nP+nQ] #K0 = opt_param[nP+nQ] #s020 = opt_param[nP+nQ+1] #amplitude0 = opt_param[nP+nQ+2] #baseline0 = opt_param[nP+nQ+3] print((Wmx0, Wmy0, K0, s020, np.tile(amplitude0, 2), baseline0)) W10list[0], W10list[1], W10list[5], W10list[4], W10list[ -1], W10list[-2] = Wmx0, Wmy0, K0, s020, np.tile( amplitude0, 2), baseline0 for ivar in range(0, 2): W10list[ ivar] = W10list[ivar] + init_noise * np.random.randn( *W10list[ivar].shape) elif constrain_isn: W10list[1][0, 0] = 3 if help_constrain_isn: W10list[1][0, 3] = 5 W10list[1][3, 0] = -5 W10list[1][3, 3] = -5 else: W10list[1][0, 1:4] = 5 W10list[1][1:4, 0] = -5 if init_W_from_file: npyfile = np.load(init_file, allow_pickle=True)[()] #Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,h1,h2,bl,amp = parse_W1(W1) #XX,XXp,Eta,Xi = parse_W2(W2) #Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,bl,amp = parse_W1(W1) W10list = [ npyfile['as_list'][ivar] for ivar in [0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15] ] W20list = [npyfile['as_list'][ivar] for ivar in [8, 9, 10, 11]] if W20list[0].size == nN * nS * 2 * nP: #assert(True==False) W10list[7] = np.array(()) W10list[1][1, 0] = W10list[1][1, 0] W20list[0] = np.nanmean( W20list[0].reshape((nN, nS, 2, nP)), 2).flatten() #XX W20list[1] = np.nanmean( W20list[1].reshape((nN, nS, 2, nP)), 2).flatten() #XXp W20list[2] = np.nanmean( W20list[2].reshape((nN, nS, 2, nQ)), 2).flatten() #Eta W20list[3] = np.nanmean( W20list[3].reshape((nN, nS, 2, nQ)), 2).flatten() #Xi if correct_Eta: #assert(True==False) W20list[2] = Eta0.copy() if len(W10list) < len(shapes1): #assert(True==False) W10list = W10list + [ np.array(1), np.zeros((nQ, )), np.zeros((nT * nS * nQ, )) ] # add h2, bl, amp if init_Eta_with_s02: #assert(True==False) s02 = W10list[4].copy() Eta0 = invert_f_mt_with_s02(YYhat, s02, nS=nS, nT=nT) W20list[2] = Eta0.copy() #if init_Eta12_with_dYY: # Eta0 = W20list[2].copy().reshape((nN,nQ*nS*nT)) # Xi0 = W20list[3].copy().reshape((nN,nQ*nS*nT)) # s020 = W10list[4].copy() # YY0s = compute_f_(Eta0,Xi0,s020) #titles = ['VIP silencing','VIP activation'] #for itype in [0,1,2,3]: # plt.figure(figsize=(5,2.5)) # for iyy,yy in enumerate([YY10s,YY20s]): # plt.subplot(1,2,iyy+1) # if np.sum(np.isnan(yy[:,itype]))==0: # sca.scatter_size_contrast(YY0s[:,itype],yy[:,itype],nsize=6,ncontrast=6)#,mn=0) # plt.title(titles[iyy]) # plt.xlabel('cell type %d event rate, \n light off'%itype) # plt.ylabel('cell type %d event rate, \n light on'%itype) # ut.erase_top_right() # plt.tight_layout() # ut.mkdir('figures') # plt.savefig('figures/scatter_light_on_light_off_init_celltype_%d.eps'%itype) for ivar in [0, 1, 4, 5]: # Wmx, Wmy, s02, k print(init_noise) W10list[ ivar] = W10list[ivar] + init_noise * np.random.randn( *W10list[ivar].shape) #print('size of bounds1: '+str(np.sum([np.size(x) for x in bd1list]))) #print('size of w10: '+str(np.sum([np.size(x) for x in W10list]))) #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1]))) W1t[ihyper][itry], W2t[ihyper][itry], loss[ihyper][ itry], gr, hess, result = calnet.fitting_2step_spatial_feature_opto_tight_nonlinear_baseline.fit_W_sim( Xhat, Xpc_list, Yhat, Ypc_list, pop_rate_fn=pop_rate_fn, pop_deriv_fn=pop_deriv_fn, W10list=W10list.copy(), W20list=W20list.copy(), bounds1=bounds1, bounds2=bounds2, niter=niter, wt_dict=wt_dict, l2_penalty=l2_penalty, compute_hessian=False, dt=dt, perturbation_size=perturbation_size, dYY=dYY, constrain_isn=constrain_isn, tv=tv, opto_mask=opto_mask, use_opto_transforms=use_opto_transforms, opto_transform1=opto_transform1, opto_transform2=opto_transform2, share_residuals=share_residuals, stimwise=stimwise, simulate1=simulate1, simulate2=simulate2, verbose=verbose) #def parse_W(W): # Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = W # return Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 def parse_W1(W): Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp = W return Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp def parse_W2(W): XX, XXp, Eta, Xi = W return XX, XXp, Eta, Xi itry = 0 Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp = parse_W1(W1t[0][0]) XX, XXp, Eta, Xi = parse_W2(W2t[0][0]) labels1 = [ 'Wmx', 'Wmy', 'Wsx', 'Wsy', 's02', 'K', 'kappa', 'T', 'h1', 'h2', 'bl', 'amp' ] labels2 = ['XX', 'XXp', 'Eta', 'Xi'] Wstar_dict = {} for i, label in enumerate(labels1): Wstar_dict[label] = W1t[0][0][i] for i, label in enumerate(labels2): Wstar_dict[label] = W2t[0][0][i] Wstar_dict['as_list'] = [ Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, bl, amp ] Wstar_dict['loss'] = loss[0][0] Wstar_dict['wt_dict'] = wt_dict np.save(weights_file, Wstar_dict, allow_pickle=True)
def MI2AMI(y, n_clusters, r, k, init, var_distrib, nj,\ nan_mask, target_nb_pseudo_obs = 500, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\ dm = [], max_patience = 1): # dm: Hack to remove ''' Complete the missing values using a trained M1DGMM y (numobs x p ndarray): The observations containing mixed variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise target_nb_pseudo_obs (int): The number of pseudo-observations to generate it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not dm (np array): The distance matrix of the observations. If not given M1DGMM computes it n_neighbors (int): The number of neighbors to use for NA imputation ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' # !!! Hack cols = y.columns # Formatting if not isinstance(nan_mask, np.ndarray): nan_mask = np.asarray(nan_mask) if not isinstance(y, np.ndarray): y = np.asarray(y) assert len(k) < 2 # Not implemented for deeper MDGMM for the moment # Keep complete observations complete_y = y[~np.isnan(y.astype(float)).any(1)] completed_y = deepcopy(y) out = M1DGMM(complete_y, 'auto', r, k, init, var_distrib, nj, it,\ eps, maxstep, seed, perform_selec = perform_selec,\ dm = dm, max_patience = max_patience, use_silhouette = True) # Compute the associations vc = vars_contributions(pd.DataFrame(complete_y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \ title = 'Contribution of the variables to the latent dimensions',\ storage_path = None) # Upacking the model from the M1DGMM output #p = y.shape[1] k = out['best_k'] r = out['best_r'] mu = out['mu'][0] lambda_bin = np.array(out['lambda_bin']) lambda_ord = out['lambda_ord'] lambda_categ = out['lambda_categ'] lambda_cont = np.array(out['lambda_cont']) nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli', 'binomial'])].astype(int) nj_ord = nj[var_distrib == 'ordinal'].astype(int) nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_cont = np.sum(var_distrib == 'continuous') nb_bin = np.sum(var_distrib == 'binomial') y_std = complete_y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\ keepdims = True) cat_features = var_distrib != 'categorical' # Compute the associations between variables and use them as weights for the optimisation assoc = cosine_similarity(vc, dense_output=True) np.fill_diagonal(assoc, 0.0) assoc = np.abs(assoc) weights = (assoc / assoc.sum(1, keepdims=True)) #============================================== # Optimisation sandbox #============================================== # Define the observation generated by the center of each cluster cluster_obs = [impute(mu[kk,:,0], var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\ lambda_ord, nj_ord, lambda_cont, y_std) for kk in range(k[0])] # Use only of the observed variables as references types = {'bin': ['bernoulli', 'binomial'], 'categ': ['categorical'],\ 'cont': ['continuous'], 'ord': 'ordinal'} # Gradient optimisation nan_indices = np.where(nan_mask.any(1))[0] imputed_y = np.zeros_like(y) numobs = y.shape[0] #************************************ # Linear constraint to stay in the support of continuous variables #************************************ lb = np.array([]) ub = np.array([]) A = np.array([[]]).reshape((0, r[0])) if nb_bin > 0: ## Corrected Binomial bounds (ub is actually +inf) bin_indices = var_distrib[np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')] binomial_indices = bin_indices == 'binomial' lb_bin = np.nanmin(y[:, var_distrib == 'binomial'], 0) lb_bin = logit( lb_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices, 0] ub_bin = np.nanmax(y[:, var_distrib == 'binomial'], 0) ub_bin = logit( ub_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices, 0] A_bin = lambda_bin[binomial_indices, 1:] ## Concatenate the constraints lb = np.concatenate([lb, lb_bin]) ub = np.concatenate([ub, ub_bin]) A = np.concatenate([A, A_bin], axis=0) if nb_cont > 0: ## Corrected Gaussian bounds lb_cont = np.nanmin(y[:, var_distrib == 'continuous'], 0) / y_std[0] - lambda_cont[:, 0] ub_cont = np.nanmax(y[:, var_distrib == 'continuous'], 0) / y_std[0] - lambda_cont[:, 0] A_cont = lambda_cont[:, 1:] ## Concatenate the constraints lb = np.concatenate([lb, lb_cont]) ub = np.concatenate([ub, ub_cont]) A = np.concatenate([A, A_cont], axis=0) lc = LinearConstraint(A, lb, ub, keep_feasible=True) zz = [] fun = [] for i in range(numobs): if i in nan_indices: # Design the nan masks for the optimisation process nan_mask_i = nan_mask[i] weights_i = weights[nan_mask_i].mean(0) # Look for the best starting point cluster_dist = [error(y[i, ~nan_mask_i], obs[~nan_mask_i],\ cat_features[~nan_mask_i], weights_i)\ for obs in cluster_obs] z02 = mu[np.argmin(cluster_dist), :, 0] # Formatting vars_i = {type_alias: np.where(~nan_mask_i[np.isin(var_distrib, vartype)])[0] \ for type_alias, vartype in types.items()} complete_categ = [ l for idx, l in enumerate(lambda_categ) if idx in vars_i['categ'] ] complete_ord = [ l for idx, l in enumerate(lambda_ord) if idx in vars_i['ord'] ] opt = minimize(stat_all, z02, \ args = (y[i, ~nan_mask_i], var_distrib[~nan_mask_i],\ weights_i[~nan_mask_i],\ lambda_bin[vars_i['bin']], nj_bin[vars_i['bin']],\ complete_categ,\ nj_categ[vars_i['categ']],\ complete_ord,\ nj_ord[vars_i['ord']],\ lambda_cont[vars_i['cont']], y_std[:, vars_i['cont']]), tol = eps, method='trust-constr', jac = grad_stat,\ constraints = lc, options = {'maxiter': 1000}) z = opt.x zz.append(z) fun.append(opt.fun) imputed_y[i] = impute(z, var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\ lambda_ord, nj_ord, lambda_cont, y_std) else: imputed_y[i] = y[i] completed_y = np.where(nan_mask, imputed_y, y) out['completed_y'] = completed_y out['zz'] = zz out['fun'] = fun return (out)