Example #1
0
def myentropy(nn_model, weightlist, xdata, returnallp=False):
    '''
    Usage: for NN_Dropout, use the same weights, duplicated N times
    for MFVI, pass the sampled weights
    '''

    #assert xdata.shape[0]==2
    n_samples = xdata.shape[1]
    p1narray = np.zeros((len(weightlist), n_samples)) #NWeightSamples x NPoints
   
    if type(nn_model) != list: 
        for i, w in enumerate(weightlist):
            w = np.reshape(w, (1, nn_model.D))
            p1narray[i, :] = nn_model.forward(w, xdata) #assumes that the 'model.forward' is dropout-like and has generates different outputs for each i
    elif type(nn_model) == list: # deterministic 
        for i, nn in enumerate(nn_model): 
            p1narray[i, :] = nn.forward(weightlist[i], xdata)
    #print (p_here.shape)
# <<<<<<< HEAD
    certainpts = np.logical_or(np.all(p1narray==0, axis=0), np.all(p1narray==1, axis=0)) 

    p2narray = 1 - p1narray
    p1narraym = np.mean(p1narray, axis=0)
    p2narraym = np.mean(p2narray, axis=0)
    Hpredcheck = -p1narraym*np.log(p1narraym) - p2narraym*np.log(p2narraym)
    Hpredcheck[certainpts] = 0.0
    if returnallp:
        return p1narray, p1narraym, Hpredcheck
    else:
        return p1narraym, Hpredcheck
Example #2
0
 def test_masking(self):
     masks = cg.get_masks(20, 3)
     self.assertTrue(np.max([np.sum(m) for m in masks]) <= 3)
     all_m = np.full(20, False)
     no_m = np.full(20, True)
     for m in masks:
         all_m = np.logical_or(all_m, m)
         no_m = np.logical_xor(no_m, m)
     self.assertTrue(np.all(all_m))
     self.assertTrue(~np.any(no_m))
Example #3
0
def find_AR(bayes_post, subj_post, prior, randomize = False, clip = [-1000, 1000]):
  bayes_post = np.clip(bayes_post, 0.00000001, 0.99999999)
  subj_post = np.clip(subj_post, 0.00000001, 0.99999999)
  if randomize:
    which_urn = np.random.binomial(1, 0.5, bayes_post.shape)
    bayes_post, subj_post, prior = (which_urn*[bayes_post, subj_post, prior] + 
                                    (1.0 - which_urn)*[1.0 - bayes_post, 1.0 - subj_post, 1.0 - prior])
    
  B_post_odds = np.log(bayes_post/(1.0 - bayes_post))
  S_post_odds = np.log(subj_post/(1.0 - subj_post))
  BLLR = B_post_odds - np.log(prior/(1.0 - prior))
  SLLR = S_post_odds - np.log(prior/(1.0 - prior))
  exclusion = BLLR == 0.0
  ARs = np.empty(BLLR.shape)
  ARs[exclusion] = 1.0
  ARs[~exclusion] = SLLR[~exclusion]/BLLR[~exclusion]
  clip_mask = np.logical_or(ARs > clip[0], ARs > clip[1])
 
  return clip_mask, 1.0 - prior, ARs
Example #4
0
def compute_nj(y, var_distrib):
    ''' Compute nj for each variable y_j
    
    y (numobs x p ndarray): The original data
    var_distrib (p 1darray): The type of the variables in the data
    -------------------------------------------------------------------
    returns (tuple (p 1d array, nb_bin 1d array, nb_ord 1d array)): The number 
    of categories of all the variables, for count/bin variables only and for 
    ordinal variables only
    '''

    nj = []
    nj_bin = []
    nj_ord = []
    nj_categ = []

    for i in range(len(y.columns)):
        if np.logical_or(var_distrib[i] == 'bernoulli',
                         var_distrib[i] == 'binomial'):
            max_nj = int(np.max(y.iloc[:, i], axis=0))
            nj.append(max_nj)
            nj_bin.append(max_nj)
        elif var_distrib[i] == 'ordinal':
            card_nj = len(np.unique(y.iloc[:, i]))
            nj.append(card_nj)
            nj_ord.append(card_nj)
        elif var_distrib[i] == 'categorical':
            card_nj = len(np.unique(y.iloc[:, i]))
            nj.append(card_nj)
            nj_categ.append(card_nj)
        elif var_distrib[i] == 'continuous':
            nj.append(np.inf)
        else:
            raise ValueError('Data type', var_distrib[i], 'is illegal')

    nj = np.array(nj)
    nj_bin = np.array(nj_bin)
    nj_ord = np.array(nj_ord)
    nj_categ = np.array(nj_categ)

    return nj, nj_bin, nj_ord, nj_categ
Example #5
0
def fit_weights_and_save(weights_file,ca_data_file='rs_vm_denoise_200605.npy',opto_silencing_data_file='vip_halo_data_for_sim.npy',opto_activation_data_file='vip_chrimson_data_for_sim.npy',constrain_wts=None,allow_var=True,fit_s02=True,constrain_isn=True,tv=False,l2_penalty=0.01,init_noise=0.1,init_W_from_lsq=False,scale_init_by=1,init_W_from_file=False,init_file=None,correct_Eta=False,init_Eta_with_s02=False,init_Eta12_with_dYY=False,use_opto_transforms=False):
    
    nsize,ncontrast = 6,6
    
    npfile = np.load(ca_data_file,allow_pickle=True)[()]#,{'rs':rs,'rs_denoise':rs_denoise},allow_pickle=True)
    rs = npfile['rs']
    #rs_denoise = npfile['rs_denoise']
    
    nsize,ncontrast,ndir = 6,6,8
    #ori_dirs = [[0,4],[2,6]] #[[0,4],[1,3,5,7],[2,6]]
    ori_dirs = [[0,1,2,3,4,5,6,7]]
    nT = len(ori_dirs)
    nS = len(rs[0])
    
    def sum_to_1(r):
        R = r.reshape((r.shape[0],-1))
        #R = R/np.nansum(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis]
        R = R/np.nansum(R,axis=1)[:,np.newaxis] # changed 8/28
        return R
    
    def norm_to_mean(r):
        R = r.reshape((r.shape[0],-1))
        R = R/np.nanmean(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis]
        return R
    
    Rs = [[None,None] for i in range(len(rs))]
    Rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))]
    rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))]
    
    for iR,r in enumerate(rs):#rs_denoise):
        print(iR)
        for ialign in range(nS):
            #Rs[iR][ialign] = r[ialign][:,:nsize,:]
            #sm = np.nanmean(np.nansum(np.nansum(Rs[iR][ialign],1),1))
            #Rs[iR][ialign] = Rs[iR][ialign]/sm
            Rs[iR][ialign] = sum_to_1(r[ialign][:,:nsize,:])
    #         Rs[iR][ialign] = von_mises_denoise(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir)))
    
    kernel = np.ones((1,2,2))
    kernel = kernel/kernel.sum()
    
    for iR,r in enumerate(rs):
        for ialign in range(nS):
            for iori in range(nT):
                Rso[iR][ialign][iori] = np.nanmean(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]],-1)
                Rso[iR][ialign][iori][:,:,0] = np.nanmean(Rso[iR][ialign][iori][:,:,0],1)[:,np.newaxis] # average 0 contrast values
                Rso[iR][ialign][iori][:,1:,1:] = ssi.convolve(Rso[iR][ialign][iori],kernel,'valid')
                Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape(Rso[iR][ialign][iori].shape[0],-1)
                #Rso[iR][ialign][iori] = Rso[iR][ialign][iori]/np.nanmean(Rso[iR][ialign][iori],-1)[:,np.newaxis]
    
    def set_bound(bd,code,val=0):
        # set bounds to 0 where 0s occur in 'code'
        for iitem in range(len(bd)):
            bd[iitem][code[iitem]] = val
    
    nN = 36
    nS = 2
    nP = 2
    nT = 1
    nQ = 4
    
    # code for bounds: 0 , constrained to 0
    # +/-1 , constrained to +/-1
    # 1.5, constrained to [0,1]
    # 2 , constrained to [0,inf)
    # -2 , constrained to (-inf,0]
    # 3 , unconstrained
    
    Wmx_bounds = 3*np.ones((nP,nQ),dtype=int)
    Wmx_bounds[0,1] = 0 # SSTs don't receive L4 input
    
    if allow_var:
        Wsx_bounds = 3*np.ones(Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)
        Wsx_bounds[0,1] = 0
    else:
        Wsx_bounds = np.zeros(Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)
    
    Wmy_bounds = 3*np.ones((nQ,nQ),dtype=int)
    Wmy_bounds[0,:] = 2 # PCs are excitatory
    Wmy_bounds[1:,:] = -2 # all the cell types except PCs are inhibitory
    Wmy_bounds[1,1] = 0 # SSTs don't inhibit themselves
    # Wmy_bounds[3,1] = 0 # PVs are allowed to inhibit SSTs, consistent with Hillel's unpublished results, but not consistent with Pfeffer et al.
    Wmy_bounds[2,0] = 0 # VIPs don't inhibit L2/3 PCs. According to Pfeffer et al., only L5 PCs were found to get VIP inhibition

    if allow_var:
        Wsy_bounds = 3*np.ones(Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)
        Wsy_bounds[1,1] = 0
        Wsy_bounds[3,1] = 0 
        Wsy_bounds[2,0] = 0
    else:
        Wsy_bounds = np.zeros(Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)

    if not constrain_wts is None:
        for wt in constrain_wts:
            Wmy_bounds[wt[0],wt[1]] = 0
            Wsy_bounds[wt[0],wt[1]] = 0
    
    def tile_nS_nT_nN(kernel):
        row = np.concatenate([kernel for idim in range(nS*nT)],axis=0)[np.newaxis,:]
        tiled = np.concatenate([row for irow in range(nN)],axis=0)
        return tiled
    
    if fit_s02:
        s02_bounds = 2*np.ones((nQ,)) # permitting noise as a free parameter
    else:
        s02_bounds = np.ones((nQ,))
    
    k_bounds = 1.5*np.ones((nQ*(nS-1),))
    
    kappa_bounds = np.ones((1,))
    # kappa_bounds = 2*np.ones((1,))
    
    T_bounds = 1.5*np.ones((nQ*(nT-1),))
    
    X_bounds = tile_nS_nT_nN(np.array([2,1]))
    # X_bounds = np.array([np.array([2,1,2,1])]*nN)
    
    Xp_bounds = tile_nS_nT_nN(np.array([3,1]))
    # Xp_bounds = np.array([np.array([3,1,3,1])]*nN)
    
    # Y_bounds = tile_nS_nT_nN(2*np.ones((nQ,)))
    # # Y_bounds = 2*np.ones((nN,nT*nS*nQ))
    
    Eta_bounds = tile_nS_nT_nN(3*np.ones((nQ,)))
    # Eta_bounds = 3*np.ones((nN,nT*nS*nQ))
    
    if allow_var:
        Xi_bounds = tile_nS_nT_nN(3*np.ones((nQ,)))
    else:
        Xi_bounds = tile_nS_nT_nN(np.zeros((nQ,)))

    # Xi_bounds = 3*np.ones((nN,nT*nS*nQ))
    
    h1_bounds = -2*np.ones((1,))
    
    h2_bounds = 2*np.ones((1,))
    
    
    # In[8]:
    
    
    # shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ,),(1,),(nN,nS*nP),(nN,nS*nQ),(nN,nS*nQ),(nN,nS*nQ)]
    shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ*(nS-1),),(1,),(nQ*(nT-1),),(nN,nT*nS*nP),(nN,nT*nS*nP),(nN,nT*nS*nQ),(nN,nT*nS*nQ),(1,),(1,),(nN,nT*nS*nQ),(nN,nT*nS*nQ)]
    print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes])))
    #         Wmx,    Wmy,    Wsx,    Wsy,    s02,  k,    kappa,T,   XX,            XXp,          Eta,          Xi, h1, h2, Eta1,   Eta2
    
    lb = [-np.inf*np.ones(shp) for shp in shapes]
    ub = [np.inf*np.ones(shp) for shp in shapes]
    bdlist = [Wmx_bounds,Wmy_bounds,Wsx_bounds,Wsy_bounds,s02_bounds,k_bounds,kappa_bounds,T_bounds,X_bounds,Xp_bounds,Eta_bounds,Xi_bounds,h1_bounds,h2_bounds,Eta_bounds,Eta_bounds]
    
    set_bound(lb,[bd==0 for bd in bdlist],val=0)
    set_bound(ub,[bd==0 for bd in bdlist],val=0)
    
    set_bound(lb,[bd==2 for bd in bdlist],val=0)
    
    set_bound(ub,[bd==-2 for bd in bdlist],val=0)
    
    set_bound(lb,[bd==1 for bd in bdlist],val=1)
    set_bound(ub,[bd==1 for bd in bdlist],val=1)
    
    set_bound(lb,[bd==1.5 for bd in bdlist],val=0)
    set_bound(ub,[bd==1.5 for bd in bdlist],val=1)
    
    set_bound(lb,[bd==-1 for bd in bdlist],val=-1)
    set_bound(ub,[bd==-1 for bd in bdlist],val=-1)
    
    # for bd in [lb,ub]:
    #     for ind in [2,3]:
    #         bd[ind][:,1] = 0
    
    # temporary for no variation expt.
    # lb[2] = np.zeros_like(lb[2])
    # lb[3] = np.zeros_like(lb[3])
    # lb[4] = np.ones_like(lb[4])
    # lb[5] = np.zeros_like(lb[5])
    # ub[2] = np.zeros_like(ub[2])
    # ub[3] = np.zeros_like(ub[3])
    # ub[4] = np.ones_like(ub[4])
    # ub[5] = np.ones_like(ub[5])
    # temporary for no variation expt.
    lb = np.concatenate([a.flatten() for a in lb])
    ub = np.concatenate([b.flatten() for b in ub])
    bounds = [(a,b) for a,b in zip(lb,ub)]
    
    
    # In[10]:
    
    
    nS = 2
    print('nT: '+str(nT))
    ndims = 5
    ncelltypes = 5
    Yhat = [[None for iT in range(nT)] for iS in range(nS)]
    Xhat = [[None for iT in range(nT)] for iS in range(nS)]
    Ypc_list = [[None for iT in range(nT)] for iS in range(nS)]
    Xpc_list = [[None for iT in range(nT)] for iS in range(nS)]
    mx = [None for iS in range(nS)]
    for iS in range(nS):
        mx[iS] = np.zeros((ncelltypes,))
        yy = [None for icelltype in range(ncelltypes)]
        for icelltype in range(ncelltypes):
            yy[icelltype] = np.nanmean(Rso[icelltype][iS][0],0)
            mx[iS][icelltype] = np.nanmax(yy[icelltype])
        for iT in range(nT):
            y = [np.nanmean(Rso[icelltype][iS][iT],axis=0)[:,np.newaxis]/mx[iS][icelltype] for icelltype in range(1,ncelltypes)]
            Ypc_list[iS][iT] = [None for icelltype in range(1,ncelltypes)]
            for icelltype in range(1,ncelltypes):
                rss = Rso[icelltype][iS][iT].copy()#/mx[iS][icelltype] #.reshape(Rs[icelltype][ialign].shape[0],-1)
                #rss = Rso[icelltype][iS][iT].copy() #.reshape(Rs[icelltype][ialign].shape[0],-1)
                rss = rss[np.isnan(rss).sum(1)==0]
        #         print(rss.max())
        #         rss[rss<0] = 0
        #         rss = rss[np.random.randn(rss.shape[0])>0]
                try:
                    u,s,v = np.linalg.svd(rss-np.mean(rss,0)[np.newaxis])
                    Ypc_list[iS][iT][icelltype-1] = [(s[idim],v[idim]) for idim in range(ndims)]
    #                 print('yep on Y')
    #                 print(np.min(np.sum(rs[icelltype][iS][iT],axis=1)))
                except:
    #                 print('nope on Y')
                    print(np.mean(np.isnan(rss)))
                    print(np.min(np.sum(rs[icelltype][iS][iT],axis=1)))
            Yhat[iS][iT] = np.concatenate(y,axis=1)
    #         x = sim_utils.columnize(Rso[0][iS][iT])[:,np.newaxis]
            icelltype = 0
            #x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]#/mx[iS][icelltype]
            x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]/mx[iS][icelltype]
    #         opto_column = np.concatenate((np.zeros((nN,)),np.zeros((nNO/2,)),np.ones((nNO/2,))),axis=0)[:,np.newaxis]
            Xhat[iS][iT] = np.concatenate((x,np.ones_like(x)),axis=1)
    #         Xhat[iS][iT] = np.concatenate((x,np.ones_like(x),opto_column),axis=1)
            icelltype = 0
            #rss = Rso[icelltype][iS][iT].copy()/mx[iS][icelltype]
            rss = Rso[icelltype][iS][iT].copy()
            rss = rss[np.isnan(rss).sum(1)==0]
    #         try:
            u,s,v = np.linalg.svd(rss-rss.mean(0)[np.newaxis])
            Xpc_list[iS][iT] = [None for iinput in range(2)]
            Xpc_list[iS][iT][0] = [(s[idim],v[idim]) for idim in range(ndims)]
            Xpc_list[iS][iT][1] = [(0,np.zeros((Xhat[0][0].shape[0],))) for idim in range(ndims)]
    #         except:
    #             print('nope on X')
    #             print(np.mean(np.isnan(rss)))
    #             print(np.min(np.sum(Rso[icelltype][iS][iT],axis=1)))
    nN,nP = Xhat[0][0].shape
    print('nP: '+str(nP))
    nQ = Yhat[0][0].shape[1]
    
    
    # In[11]:
    
    
    def compute_f_(Eta,Xi,s02):
        return sim_utils.f_miller_troyer(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)]))
    def compute_fprime_m_(Eta,Xi,s02):
        return sim_utils.fprime_miller_troyer(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)]))*Xi
    def compute_fprime_s_(Eta,Xi,s02):
        s2 = Xi**2+np.concatenate((s02,s02),axis=0)
        return sim_utils.fprime_s_miller_troyer(Eta,s2)*(Xi/s2)
    def sorted_r_eigs(w):
        drW,prW = np.linalg.eig(w)
        srtinds = np.argsort(drW)
        return drW[srtinds],prW[:,srtinds]
    
    
    # In[12]:
    
    
    #         0.Wmx,  1.Wmy,  2.Wsx,  3.Wsy,  4.s02,5.K,  6.kappa,7.T,8.XX,        9.XXp,        10.Eta,       11.Xi,   12.h1,  13.h2,  14.Eta1,    15.Eta2
    
    shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ*(nS-1),),(1,),(nQ*(nT-1),),(nN,nT*nS*nP),(nN,nT*nS*nP),(nN,nT*nS*nQ),(nN,nT*nS*nQ),(1,),(1,),(nN,nT*nS*nQ),(nN,nT*nS*nQ)]
    print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes])))
    
    
    import calnet.fitting_spatial_feature
    import sim_utils

    YYhat = calnet.utils.flatten_nested_list_of_2d_arrays(Yhat)
    XXhat = calnet.utils.flatten_nested_list_of_2d_arrays(Xhat)
    
    opto_dict = np.load(opto_silencing_data_file,allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    Yhat_opto = np.nanmean(np.reshape(Yhat_opto,(nN,2,nS,2,nQ)),3).reshape((nN*2,-1))
    Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2]
        
    YYhat_halo = Yhat_opto.reshape((nN,2,-1))
    opto_transform1 = calnet.utils.fit_opto_transform(YYhat_halo)

    opto_transform1.res[:,[0,2,3,4,6,7]] = 0

    dYY1 = opto_transform1.transform(YYhat) - YYhat
    #YYhat_halo_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_halo)
    #dYY1 = YYhat_halo_sim[:,1,:] - YYhat_halo_sim[:,0,:]

    def overwrite_plus_n(arr,to_overwrite,n):
        arr[:,to_overwrite] = arr[:,int(to_overwrite+n)]
        return arr

    for to_overwrite in [1,2]:
        n = 4
        dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \
                = [overwrite_plus_n(x,to_overwrite,n) for x in \
                        [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]]
    for to_overwrite in [7]:
        n = -4
        dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \
                = [overwrite_plus_n(x,to_overwrite,n) for x in \
                        [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]]
    #for to_overwrite in [1,2]:
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite+4]
    #for to_overwrite in [7]:
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite-4]
    
    #Yhat_opto = opto_dict['Yhat_opto']
    #for iS in range(nS):
    #    mx = np.zeros((nQ,))
    #    for iQ in range(nQ):
    #        slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ)
    #        mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer])
    #        Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ]
    ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    #print(Yhat_opto.shape)
    #h_opto = opto_dict['h_opto']
    #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2]
    #for to_overwrite in [1,2,5,6]: # overwrite sst and vip with off-centered values
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite+8]
    #for to_overwrite in [11,15]:
    #    dYY1[:,to_overwrite] = np.nan #dYY1[:,to_overwrite-8]


    opto_dict = np.load(opto_activation_data_file,allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    Yhat_opto = np.nanmean(np.reshape(Yhat_opto,(nN,2,nS,2,nQ)),3).reshape((nN*2,-1))
    Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2]

    YYhat_chrimson = Yhat_opto.reshape((nN,2,-1))
    opto_transform2 = calnet.utils.fit_opto_transform(YYhat_chrimson)
    dYY2 = opto_transform2.transform(YYhat) - YYhat
    #YYhat_chrimson_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_chrimson)
    #dYY2 = YYhat_chrimson_sim[:,1,:] - YYhat_chrimson_sim[:,0,:]

    #Yhat_opto = opto_dict['Yhat_opto']
    #for iS in range(nS):
    #    mx = np.zeros((nQ,))
    #    for iQ in range(nQ):
    #        slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ)
    #        mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer])
    #        Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ]
    ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    #print(Yhat_opto.shape)
    #h_opto = opto_dict['h_opto']
    #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2]
    
    print('dYY1 mean: %03f'%np.nanmean(np.abs(dYY1)))
    print('dYY2 mean: %03f'%np.nanmean(np.abs(dYY2)))

    dYY = np.concatenate((dYY1,dYY2),axis=0)
    
    titles = ['VIP silencing','VIP activation']
    for itype in [0,1,2,3]:
        plt.figure(figsize=(5,2.5))
        for iyy,dyy in enumerate([dYY1,dYY2]):
            plt.subplot(1,2,iyy+1)
            if np.sum(np.isnan(dyy[:,itype]))==0:
                sca.scatter_size_contrast(YYhat[:,itype],YYhat[:,itype]+dyy[:,itype],nsize=6,ncontrast=6)#,mn=0)
            plt.title(titles[iyy])
            plt.xlabel('cell type %d event rate, \n light off'%itype)
            plt.ylabel('cell type %d event rate, \n light on'%itype)
            ut.erase_top_right()
        plt.tight_layout()
        ut.mkdir('figures')
        plt.savefig('figures/scatter_light_on_light_off_target_celltype_%d.eps'%itype)
    
    opto_mask = ~np.isnan(dYY)
    #dYY[nN:][~opto_mask[nN:]] = -dYY[:nN][~opto_mask[nN:]]

    print('mean of opto_mask: '+str(opto_mask.mean()))
    
    #dYY[~opto_mask] = 0
    def zero_nans(arr):
        arr[np.isnan(arr)] = 0
        return arr
    #dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\
    #        opto_transform2.slope,opto_transform2.intercept,opto_transform2.res\
    #        = [zero_nans(x) for x in \
    #                [dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\
    #                opto_transform2.slope,opto_transform2.intercept,opto_transform2.res]]
    dYY = zero_nans(dYY)

    to_adjust = np.logical_or(np.isnan(opto_transform2.slope[0]),np.isnan(opto_transform2.intercept[0]))

    opto_transform2.slope[:,to_adjust] = 1/opto_transform1.slope[:,to_adjust]
    opto_transform2.intercept[:,to_adjust] = -opto_transform1.intercept[:,to_adjust]/opto_transform1.slope[:,to_adjust]
    opto_transform2.res[:,to_adjust] = -opto_transform1.res[:,to_adjust]/opto_transform1.slope[:,to_adjust]
    
    np.save('/Users/dan/Documents/notebooks/mossing-PC/shared_data/calnet_data/dYY.npy',dYY)
    
    from importlib import reload
    reload(calnet)
    #reload(calnet.fitting_spatial_feature_opto_nonlinear)
    reload(sim_utils)
    # reload(calnet.fitting_spatial_feature)
    # W0list = [np.ones(shp) for shp in shapes]
    wt_dict = {}
    wt_dict['X'] = 1
    wt_dict['Y'] = 15
    wt_dict['Eta'] = 10 # 1 # 
    wt_dict['Xi'] = 0.1
    wt_dict['stims'] = np.ones((nN,1)) #(np.arange(30)/30)[:,np.newaxis]**1 #
    wt_dict['barrier'] = 0. #30.0 #0.1
    wt_dict['opto'] = 1e-1#1e1
    wt_dict['isn'] = 3
    wt_dict['tv'] = 1
    wt_dict['stimsOpto'] = 0.6*np.ones((nN,1))
    wt_dict['stimsOpto'][0::6] = 3
    wt_dict['celltypesOpto'] = 0.67*np.ones((1,nQ*nS*nT))
    wt_dict['celltypesOpto'][0,0::nQ] = 2
    wt_dict['dirOpto'] = np.array((1,0.5))
    wt_dict['dYY'] = 1#1000
    wt_dict['Eta12'] = 1
    wt_dict['coupling'] = 1

    np.save('XXYYhat.npy',{'YYhat':YYhat,'XXhat':XXhat,'rs':rs,'Rs':Rs,'Rso':Rso,'Ypc_list':Ypc_list,'Xpc_list':Xpc_list})
    Eta0 = invert_f_mt(YYhat)
    Eta10 = invert_f_mt(YYhat + dYY[:nN])
    Eta20 = invert_f_mt(YYhat + dYY[nN:])
    print('mean Eta1 diff: '+str(np.mean(np.abs(Eta0-Eta10))))
    print('mean Eta2 diff: '+str(np.mean(np.abs(Eta0-Eta20))))


    ntries = 1
    nhyper = 1
    dt = 1e-1
    niter = int(np.round(10/dt)) #int(1e4)
    perturbation_size = 5e-2
    # learning_rate = 1e-4 # 1e-5 #np.linspace(3e-4,1e-3,niter+1) # 1e-5
    #l2_penalty = 0.1
    Wt = [[None for itry in range(ntries)] for ihyper in range(nhyper)]
    loss = np.zeros((nhyper,ntries))
    is_neg = np.array([b[1] for b in bounds])==0
    counter = 0
    negatize = [np.zeros(shp,dtype='bool') for shp in shapes]
    print(shapes)
    for ishp,shp in enumerate(shapes):
        nel = np.prod(shp)
        negatize[ishp][:][is_neg[counter:counter+nel].reshape(shp)] = True
        counter = counter + nel
    for ihyper in range(nhyper):
        for itry in range(ntries):
            print((ihyper,itry))
            W0list = [init_noise*(ihyper+1)*np.random.rand(*shp) for shp in shapes]
            print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes])))
            print('size of w0: '+str(np.sum([np.size(x) for x in W0list])))
            print('len(W0list) : '+str(len(W0list)))
            counter = 0
            for ishp,shp in enumerate(shapes):
                W0list[ishp][negatize[ishp]] = -W0list[ishp][negatize[ishp]]
            W0list[4] = np.ones(shapes[5]) # s02
            W0list[5] = np.ones(shapes[5]) # K
            W0list[6] = np.ones(shapes[6]) # kappa
            W0list[7] = np.ones(shapes[7]) # T
            W0list[8] = np.concatenate(Xhat,axis=1) #XX
            W0list[9] = np.zeros_like(W0list[8]) #XXp
            W0list[10] = Eta0.copy() #np.zeros(shapes[10]) #Eta
            W0list[11] = np.zeros(shapes[11]) #Xi
            W0list[14] = Eta10.copy() # Eta1
            W0list[15] = Eta20.copy() # Eta2
            #[Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi]
    #         W0list = Wstar_dict['as_list'].copy()
    #         W0list[1][1,0] = -1.5
    #         W0list[1][3,0] = -1.5
            if init_W_from_lsq:
                W0list[0],W0list[1] = initialize_W(Xhat,Yhat,scale_by=scale_init_by)
                for ivar in range(0,2):
                    W0list[ivar] = W0list[ivar] + init_noise*np.random.randn(*W0list[ivar].shape)
            if constrain_isn:
                W0list[1][0,0] = 3 
                W0list[1][0,3] = 5 
                W0list[1][3,0] = -5
                W0list[1][3,3] = -5

            #if constrain_isn:
            #    W0list[1][0,0] = 2
            #    W0list[1][0,3] = 2
            #    W0list[1][3,0] = -2
            #    W0list[1][3,3] = -2

            #if wt_dict['coupling'] > 0:
            #    W0list[1][1,0] = -1

            if init_W_from_file:
                npyfile = np.load(init_file,allow_pickle=True)[()]
                W0list = npyfile['as_list']
                if W0list[8].size == nN*nS*2*nP:
                    W0list[7] = np.array(())
                    W0list[1][1,0] = W0list[1][1,0]
                    W0list[8] = np.nanmean(W0list[8].reshape((nN,nS,2,nP)),2).flatten() #XX
                    W0list[9] = np.nanmean(W0list[9].reshape((nN,nS,2,nP)),2).flatten() #XXp
                    W0list[10] = np.nanmean(W0list[10].reshape((nN,nS,2,nQ)),2).flatten() #Eta
                    W0list[11] = np.nanmean(W0list[11].reshape((nN,nS,2,nQ)),2).flatten() #Xi
                if correct_Eta:
                    W0list[10] = Eta0.copy()
                if len(W0list) < len(shapes):
                    W0list = W0list[:-1] + [np.array(-0.5),np.array(1),Eta10.copy(),Eta20.copy()] # add h1,h2,Eta1,Eta2
                if init_Eta_with_s02:
                    s02 = W0list[4].copy()
                    Eta0 = invert_f_mt_with_s02(YYhat,s02,nS=nS,nT=nT)
                    Eta10 = invert_f_mt_with_s02(YYhat+dYY[:nN],s02,nS=nS,nT=nT)
                    Eta20 = invert_f_mt_with_s02(YYhat+dYY[nN:],s02,nS=nS,nT=nT)
                    W0list[10] = Eta0.copy()
                    W0list[14] = Eta10.copy()
                    W0list[15] = Eta20.copy()
                if init_Eta12_with_dYY:
                    Eta0 = W0list[10].copy().reshape((nN,nQ*nS*nT))
                    Xi0 = W0list[11].copy().reshape((nN,nQ*nS*nT))
                    s020 = W0list[4].copy()
                    YY0s = compute_f_(Eta0,Xi0,s020)
                    this_YY1 = opto_transform1.transform(YY0s)
                    this_YY2 = opto_transform2.transform(YY0s)
                    Eta10 = invert_f_mt_with_s02(this_YY1,s020,nS=nS,nT=nT)
                    Eta20 = invert_f_mt_with_s02(this_YY2,s020,nS=nS,nT=nT)
                    W0list[14] = Eta10.copy()
                    W0list[15] = Eta20.copy()

                    YY10s = compute_f_(Eta10,Xi0,s020)
                    YY20s = compute_f_(Eta20,Xi0,s020)
                    titles = ['VIP silencing','VIP activation']
                    for itype in [0,1,2,3]:
                        plt.figure(figsize=(5,2.5))
                        for iyy,yy in enumerate([YY10s,YY20s]):
                            plt.subplot(1,2,iyy+1)
                            if np.sum(np.isnan(yy[:,itype]))==0:
                                sca.scatter_size_contrast(YY0s[:,itype],yy[:,itype],nsize=6,ncontrast=6)#,mn=0)
                            plt.title(titles[iyy])
                            plt.xlabel('cell type %d event rate, \n light off'%itype)
                            plt.ylabel('cell type %d event rate, \n light on'%itype)
                            ut.erase_top_right()
                        plt.tight_layout()
                        ut.mkdir('figures')
                        plt.savefig('figures/scatter_light_on_light_off_init_celltype_%d.eps'%itype)
                #if wt_dict['coupling'] > 0:
                #    W0list[1][1,0] = W0list[1][1,0] - 1
                for ivar in [0,1,4,5]: # Wmx, Wmy, s02, k
                    W0list[ivar] = W0list[ivar] + init_noise*np.random.randn(*W0list[ivar].shape)

            # wt_dict['Xi'] = 10
            # wt_dict['Eta'] = 10
            print('size of bounds: '+str(np.sum([np.size(x) for x in bdlist])))
            print('size of w0: '+str(np.sum([np.size(x) for x in W0list])))
            print('size of shapes: '+str(np.sum([np.prod(shp) for shp in shapes])))
            Wt[ihyper][itry],loss[ihyper][itry],gr,hess,result = calnet.fitting_spatial_feature_opto_nonlinear.fit_W_sim(Xhat,Xpc_list,Yhat,Ypc_list,pop_rate_fn=sim_utils.f_miller_troyer,pop_deriv_fn=sim_utils.fprime_miller_troyer,neuron_rate_fn=sim_utils.evaluate_f_mt,W0list=W0list.copy(),bounds=bounds,niter=niter,wt_dict=wt_dict,l2_penalty=l2_penalty,compute_hessian=False,dt=dt,perturbation_size=perturbation_size,dYY=dYY,constrain_isn=constrain_isn,tv=tv,opto_mask=opto_mask,use_opto_transforms=use_opto_transforms,opto_transform1=opto_transform1,opto_transform2=opto_transform2)
    #         Wt[ihyper][itry] = [w[-1] for w in Wt_temp]
    #         loss[ihyper,itry] = loss_temp[-1]
    
    
    def parse_W(W):
        Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2 = W
        return Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2
    
    
    itry = 0
    Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2 = parse_W(Wt[0][0])
    
    labels = ['Wmx','Wmy','Wsx','Wsy','s02','K','kappa','T','XX','XXp','Eta','Xi','h1','h2','Eta1','Eta2']
    Wstar_dict = {}
    for i,label in enumerate(labels):
        Wstar_dict[label] = Wt[0][0][i]
    Wstar_dict['as_list'] = [Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,Eta1,Eta2]
    Wstar_dict['loss'] = loss[0][0]
    Wstar_dict['wt_dict'] = wt_dict
    np.save(weights_file,Wstar_dict,allow_pickle=True)
Example #6
0
def DDGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True):
    ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM)
    
    y (numobs x p ndarray): The observations containing categorical variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    prev_lik = -1E16
    best_lik = -1E16
    tol = 0.01
    max_patience = 1
    patience = 0

    best_k = deepcopy(k)
    best_r = deepcopy(r)

    best_sil = -1
    new_sil = -1

    # Initialize the parameters
    eta = deepcopy(init['eta'])
    psi = deepcopy(init['psi'])
    lambda_bin = deepcopy(init['lambda_bin'])
    lambda_ord = deepcopy(init['lambda_ord'])
    lambda_categ = deepcopy(init['lambda_categ'])

    H = deepcopy(init['H'])
    w_s = deepcopy(
        init['w_s']
    )  # Probability of path s' through the network for all s' in Omega

    numobs = len(y)
    likelihood = []
    it_num = 0
    ratio = 1000
    np.random.seed = seed

    # Dispatch variables between categories
    y_bin = y[:,
              np.logical_or(var_distrib == 'bernoulli', var_distrib ==
                            'binomial')]
    nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',
                              var_distrib == 'binomial')].astype(int)
    nb_bin = len(nj_bin)

    y_categ = y[:, var_distrib == 'categorical']
    nj_categ = nj[var_distrib == 'categorical'].astype(int)
    nb_categ = len(nj_categ)

    y_ord = y[:, var_distrib == 'ordinal']
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nb_ord = len(nj_ord)

    L = len(k)
    k_aug = k + [1]
    S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)])
    M = M_growth(1, r, numobs)

    assert nb_ord + nb_bin + nb_categ > 0

    # Compute the Gower matrix
    cat_features = np.logical_or(var_distrib == 'categorical',
                                 var_distrib == 'bernoulli')
    dm = gower_matrix(y, cat_features=cat_features)

    while (it_num < it) & ((ratio > eps) | (patience <= max_patience)):
        print(it_num)

        # The clustering layer is the one used to perform the clustering
        # i.e. the layer l such that k[l] == n_clusters
        clustering_layer = np.argmax(np.array(k) == n_clusters)

        #####################################################################################
        ################################# S step ############################################
        #####################################################################################

        #=====================================================================
        # Draw from f(z^{l} | s, Theta) for all s in Omega
        #=====================================================================

        mu_s, sigma_s = compute_path_params(eta, H, psi)
        sigma_s = ensure_psd(sigma_s)
        z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M)
        '''
        print('mu_s',  np.abs(mu_s[0]).mean())
        print('sigma_s',  np.abs(sigma_s[0]).mean())
        print('z_s0',  np.abs(z_s[0]).mean())
        print('z_s1',  np.abs(z_s[1]).mean(0)[:,0])
        '''

        #========================================================================
        # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1
        #========================================================================

        chsi = compute_chsi(H, psi, mu_s, sigma_s)
        chsi = ensure_psd(chsi)
        rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi)

        # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively
        z2_z1s = draw_z2_z1s(chsi, rho, M, r)

        #=======================================================================
        # Compute the p(y| z1) for all variable categories
        #=======================================================================

        py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord,
                        lambda_categ, y_categ, nj_categ, z_s[0])

        #========================================================================
        # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s
        #========================================================================

        zl1_ys = draw_zl1_ys(z_s, py_zl1, M)

        #####################################################################################
        ################################# E step ############################################
        #####################################################################################

        #=====================================================================
        # Compute conditional probabilities used in the appendix of asta paper
        #=====================================================================

        pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s,
                                          py_zl1)
        #del(py_zl1)

        #=====================================================================
        # Compute p(z^{(l)}| s, y). Equation (5) of the paper
        #=====================================================================

        pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S)
        pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s)

        #=====================================================================
        # Compute MFA expectations
        #=====================================================================

        Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \
            E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S)

        ###########################################################################
        ############################ M step #######################################
        ###########################################################################

        #=======================================================
        # Compute MFA Parameters
        #=======================================================

        w_s = np.mean(ps_y, axis=0)
        eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y,
                                  H, k)

        #=======================================================
        # Identifiability conditions
        #=======================================================

        # Update eta, H and Psi values
        H = diagonal_cond(H, psi)
        Ez, AT = compute_z_moments(w_s, eta, H, psi)
        eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)

        del (Ez)

        #=======================================================
        # Compute GLLVM Parameters
        #=======================================================

        # We optimize each column separately as it is faster than all column jointly
        # (and more relevant with the independence hypothesis)

        lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        ###########################################################################
        ################## Clustering parameters updating #########################
        ###########################################################################

        new_lik = np.sum(np.log(p_y))
        likelihood.append(new_lik)
        ratio = (new_lik - prev_lik) / abs(prev_lik)
        print(likelihood)

        idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1]))
        psl_y = ps_y.reshape(numobs, *k, order='C').sum(idx_to_sum)

        temp_class = np.argmax(psl_y, axis=1)
        try:
            new_sil = silhouette_score(dm, temp_class, metric='precomputed')
        except ValueError:
            new_sil = -1

        print('Silhouette score:', new_sil)
        if best_sil < new_sil:
            z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1)
            best_sil = deepcopy(new_sil)
            classes = deepcopy(temp_class)

            fig = plt.figure(figsize=(8, 8))
            plt.scatter(z[:, 0], z[:, 1])
            plt.show()

        # Refresh the classes only if they provide a better explanation of the data
        if best_lik < new_lik:
            best_lik = deepcopy(prev_lik)

        if prev_lik < new_lik:
            patience = 0
            M = M_growth(it_num + 2, r, numobs)
        else:
            patience += 1

        ###########################################################################
        ######################## Parameter selection  #############################
        ###########################################################################

        is_not_min_specif = not (np.all(np.array(k) == n_clusters)
                                 & np.array_equal(r, [2, 1]))

        if look_for_simpler_network(
                it_num) & perform_selec & is_not_min_specif:
            r_to_keep = r_select(y_bin, y_ord, y_categ, zl1_ys, z2_z1s, w_s)

            # If r_l == 0, delete the last l + 1: layers
            new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1

            k_to_keep = k_select(w_s, k, new_L, clustering_layer)

            is_L_unchanged = L == new_L
            is_r_unchanged = np.all(
                [len(r_to_keep[l]) == r[l] for l in range(new_L + 1)])
            is_k_unchanged = np.all(
                [len(k_to_keep[l]) == k[l] for l in range(new_L)])

            is_selection = not (is_r_unchanged & is_k_unchanged
                                & is_L_unchanged)

            assert new_L > 0

            if is_selection:

                eta = [eta[l][k_to_keep[l]] for l in range(new_L)]
                eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)]

                H = [H[l][k_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, r_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)]

                psi = [psi[l][k_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)]

                if nb_bin > 0:
                    # Add the intercept:
                    bin_r_to_keep = np.concatenate([[0],
                                                    np.array(r_to_keep[0]) + 1
                                                    ])
                    lambda_bin = lambda_bin[:, bin_r_to_keep]

                if nb_ord > 0:
                    # Intercept coefficients handling is a little more complicated here
                    lambda_ord_intercept = [
                        lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord
                    ]
                    Lambda_ord_var = np.stack(
                        [lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord])
                    Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]]
                    lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\
                                  for j in range(nb_ord)]

                if nb_categ > 0:
                    lambda_categ_intercept = [
                        lambda_categ[j][:, 0] for j in range(nb_categ)
                    ]
                    Lambda_categ_var = [
                        lambda_categ_j[:, -r[0]:]
                        for lambda_categ_j in lambda_categ
                    ]
                    Lambda_categ_var = [
                        lambda_categ_j[:, r_to_keep[0]]
                        for lambda_categ_j in lambda_categ
                    ]

                    lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\
                                   for j in range(nb_categ)]

                w = w_s.reshape(*k, order='C')
                new_k_idx_grid = np.ix_(*k_to_keep[:new_L])

                # If layer deletion, sum the last components of the paths
                if L > new_L:
                    deleted_dims = tuple(range(L)[new_L:])
                    w_s = w[new_k_idx_grid].sum(deleted_dims).flatten(
                        order='C')
                else:
                    w_s = w[new_k_idx_grid].flatten(order='C')

                w_s /= w_s.sum()

                k = [len(k_to_keep[l]) for l in range(new_L)]
                r = [len(r_to_keep[l]) for l in range(new_L + 1)]

                k_aug = k + [1]
                S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)])
                L = new_L

                patience = 0
                best_r = deepcopy(r)
                best_k = deepcopy(k)

                # Identifiability conditions
                H = diagonal_cond(H, psi)
                Ez, AT = compute_z_moments(w_s, eta, H, psi)
                eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)

            print('New architecture:')
            print('k', k)
            print('r', r)
            print('L', L)
            print('S', S)
            print("w_s", len(w_s))

        prev_lik = deepcopy(new_lik)
        it_num = it_num + 1

    out = dict(likelihood = likelihood, classes = classes, z = z, \
               best_r = best_r, best_k = best_k)
    return (out)
Example #7
0
def MDGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True): 
    
    ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM)
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int or str): The number of clusters to look for in the data or the use mode of the MDGMM
    r (dict): The dimension of latent variables through the first 2 layers
    k (dict): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
                    For categorical data: the number of different existing categories for each variable
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''
    
    # Break the reference link 
    k = deepcopy(k)
    r = deepcopy(r)
    
    best_k = deepcopy(k)
    best_r = deepcopy(r)

    # Add other checks for the other variables
    check_inputs(k, r)

    prev_lik = - 1E15
    best_lik = -1E15
    
    tol = 0.01
    max_patience = 1
    patience = 0
    
    #====================================================
    # Initialize the parameters
    #====================================================
        
    eta_c, eta_d, H_c, H_d, psi_c, psi_d = dispatch_dgmm_init(init)
    lambda_bin, lambda_ord, lambda_categ = dispatch_gllvm_init(init)
    w_s_c, w_s_d = dispatch_paths_init(init)
    
    numobs = len(y)
    likelihood = []
    it_num = 0
    ratio = 1000
    np.random.seed = seed

    #====================================================        
    # Dispatch variables between categories
    #====================================================

    y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',\
                               var_distrib == 'binomial')]
    nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',\
                              var_distrib == 'binomial')]
        
    nj_bin = nj_bin.astype(int)
    nb_bin = len(nj_bin)
        
    y_ord = y[:, var_distrib == 'ordinal']    
    nj_ord = nj[var_distrib == 'ordinal']
    nj_ord = nj_ord.astype(int)
    nb_ord = len(nj_ord)
    
    y_categ = y[:, var_distrib == 'categorical']
    nj_categ = nj[var_distrib == 'categorical'].astype(int)
    nb_categ = len(nj_categ)    
    
    yc = y[:, var_distrib == 'continuous'] 
    
    ss = StandardScaler()
    yc = ss.fit_transform(yc)

    nb_cont = yc.shape[1]
    
    # *_1L standsds for quantities going through all the network (head + tail)
    k_1L, L_1L, L, bar_L, S_1L = nb_comps_and_layers(k)    
    r_1L = {'c': r['c'] + r['t'], 'd': r['d'] + r['t'], 't': r['t']}
    
    best_sil = [-1.1 for l in range(L['t'] - 1)] if n_clusters == 'multi' else -1.1 
    new_sil = [-1.1 for l in range(L['t'] - 1)] if n_clusters == 'multi' else -1.1 
    
    
    M = M_growth(1, r_1L, numobs) 

    if nb_bin + nb_ord + nb_categ == 0: # Create the InputError class and change this
        raise ValueError('Input does not contain discrete variables,\
                         consider using a regular DGMM')
    if nb_cont == 0: # Create the InputError class and change this
        raise ValueError('Input does not contain continuous values,\
                         consider using a DDGMM')
                         
                         
    # Compute the Gower matrix
    cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli')
    dm = gower_matrix(y, cat_features = cat_features)
                     
    while (it_num < it) & ((ratio > eps) | (patience <= max_patience)):
        print(it_num)

        # The clustering layer is the one used to perform the clustering 
        # i.e. the layer l such that k[l] == n_clusters
        if not(isnumeric(n_clusters)):
            if n_clusters == 'auto':
                clustering_layer = 0
            elif n_clusters == 'multi':
                clustering_layer = list(range(L['t'] - 1))
            else:
                raise ValueError('Please enter an int, auto or multi for n_clusters')
        else:
            assert (np.array(k['t']) == n_clusters).any()
            clustering_layer = np.argmax(np.array(k['t']) == n_clusters)

        #####################################################################################
        ################################# MC step ############################################
        #####################################################################################

        #=====================================================================
        # Draw from f(z^{l} | s, Theta) for both heads and tail
        #=====================================================================  
        
        mu_s_c, sigma_s_c = compute_path_params(eta_c, H_c, psi_c)
        sigma_s_c = ensure_psd(sigma_s_c)
        
        mu_s_d, sigma_s_d = compute_path_params(eta_d, H_d, psi_d)
        sigma_s_d = ensure_psd(sigma_s_d)
                        
        z_s_c, zc_s_c, z_s_d, zc_s_d = draw_z_s_all_network(mu_s_c, sigma_s_c,\
                            mu_s_d, sigma_s_d, yc, eta_c, eta_d, S_1L, L, M)
                    
        #========================================================================
        # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1
        #========================================================================
        
        # Create wrapper as before and after
        chsi_c = compute_chsi(H_c, psi_c, mu_s_c, sigma_s_c)
        chsi_c = ensure_psd(chsi_c)
        rho_c = compute_rho(eta_c, H_c, psi_c, mu_s_c, sigma_s_c, zc_s_c, chsi_c)
        
                
        chsi_d = compute_chsi(H_d, psi_d, mu_s_d, sigma_s_d)
        chsi_d = ensure_psd(chsi_d)
        rho_d = compute_rho(eta_d, H_d, psi_d, mu_s_d, sigma_s_d, zc_s_d, chsi_d)


        # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively
        z2_z1s_c, z2_z1s_d = draw_z2_z1s_network(chsi_c, chsi_d, rho_c, \
                                                 rho_d, M, r_1L, L)
        
        #=======================================================================
        # Compute the p(y^D| z1) for all discrete variables
        #=======================================================================
        
        py_zl1_d = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord,\
                          lambda_categ, y_categ, nj_categ, z_s_d[0])
        
        #========================================================================
        # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s
        #========================================================================
                
        zl1_ys_d = draw_zl1_ys(z_s_d, py_zl1_d, M['d'])
                
        #####################################################################################
        ################################# E step ############################################
        #####################################################################################
        
        #=====================================================================
        # Compute quantities necessary for E steps of both heads and tail
        #=====================================================================
        
        # Discrete head quantities
        pzl1_ys_d, ps_y_d, py_d = E_step_GLLVM(z_s_d[0], mu_s_d[0], sigma_s_d[0], w_s_d, py_zl1_d)        
        py_s_d = ps_y_d * py_d / w_s_d[n_axis]
        
        # Continuous head quantities
        ps_y_c, py_s_c, py_c = continuous_lik(yc, mu_s_c[0], sigma_s_c[0], w_s_c)
        
        pz_s_d = fz_s(z_s_d, mu_s_d, sigma_s_d) 
        pz_s_c = fz_s(z_s_c, mu_s_c, sigma_s_c) 
        
        #=====================================================================
        # Compute p(z^{(l)}| s, y). Equation (5) of the paper
        #=====================================================================
        
        # Compute pz2_z1s_d and pz2_z1s_d for the tail indices whereas it is useless
        
        pz2_z1s_d = fz2_z1s(t(pzl1_ys_d, (1, 0, 2)), z2_z1s_d, chsi_d, rho_d, S_1L['d'])
        pz_ys_d = fz_ys(t(pzl1_ys_d, (1, 0, 2)), pz2_z1s_d)
          
        pz2_z1s_c = fz2_z1s([], z2_z1s_c, chsi_c, rho_c, S_1L['c'])
        pz_ys_c = fz_ys([], pz2_z1s_c)
        
        pz2_z1s_t = fz2_z1s([], z2_z1s_c[bar_L['c']:], chsi_c[bar_L['c']:], \
                            rho_c[bar_L['c']:], S_1L['t'])

        # Junction layer computations
        # Compute p(zC |s)
        py_zs_d = fy_zs(pz_ys_d, py_s_d) 
        py_zs_c = fy_zs(pz_ys_c, py_s_c)
         
        # Compute p(zt | yC, yD, sC, SD)        
        pzt_yCyDs = fz_yCyDs(py_zs_c, pz_ys_d, py_s_c, M, S_1L, L)

        #=====================================================================
        # Compute MFA expectations
        #=====================================================================
        
        # Discrete head. 
        Ez_ys_d, E_z1z2T_ys_d, E_z2z2T_ys_d, EeeT_ys_d = \
            E_step_DGMM_d(zl1_ys_d, H_d, z_s_d, zc_s_d, z2_z1s_d, pz_ys_d,\
                        pz2_z1s_d, S_1L['d'], L['d'])
        
            
        # Continuous head
        Ez_ys_c, E_z1z2T_ys_c, E_z2z2T_ys_c, EeeT_ys_c = \
            E_step_DGMM_c(H_c, z_s_c, zc_s_c, z2_z1s_c, pz_ys_c,\
                          pz2_z1s_c, S_1L['c'], L['c'])


        # Junction layers
        Ez_ys_t, E_z1z2T_ys_t, E_z2z2T_ys_t, EeeT_ys_t = \
            E_step_DGMM_t(H_c[bar_L['c']:], \
            z_s_c[bar_L['c']:], zc_s_c[bar_L['c']:], z2_z1s_c[bar_L['c']:],\
                pzt_yCyDs, pz2_z1s_t, S_1L, L, k_1L)  
        
        # Error here for the first two terms: p(y^h | z^t, s^C) != p(y^h | z^t, s^{1C:L})
        pst_yCyD = fst_yCyD(py_zs_c, py_zs_d, pz_s_d, w_s_c, w_s_d, k_1L, L)   
                               
        ###########################################################################
        ############################ M step #######################################
        ###########################################################################

        #=======================================================
        # Compute DGMM Parameters 
        #=======================================================
            
        # Discrete head
        w_s_d = np.mean(ps_y_d, axis = 0)      
        eta_d_barL, H_d_barL, psi_d_barL = M_step_DGMM(Ez_ys_d, E_z1z2T_ys_d, E_z2z2T_ys_d, \
                                        EeeT_ys_d, ps_y_d, H_d, k_1L['d'][:-1],\
                                            L_1L['d'], r_1L['d'])
         
        # Add dispatching function here
        eta_d[:bar_L['d']] = eta_d_barL
        H_d[:bar_L['d']] = H_d_barL
        psi_d[:bar_L['d']] = psi_d_barL
                
        # Continuous head
        w_s_c = np.mean(ps_y_c, axis = 0)  
        eta_c_barL, H_c_barL, psi_c_barL = M_step_DGMM(Ez_ys_c, E_z1z2T_ys_c, E_z2z2T_ys_c, \
                                        EeeT_ys_c, ps_y_c, H_c, k_1L['c'][:-1],\
                                            L_1L['c'] + 1, r_1L['c'])
        
        eta_c[:bar_L['c']] = eta_c_barL
        H_c[:bar_L['c']] = H_c_barL
        psi_c[:bar_L['c']] = psi_c_barL
                    

        # Common tail
        eta_t, H_t, psi_t, Ezst_y = M_step_DGMM_t(Ez_ys_t, E_z1z2T_ys_t, E_z2z2T_ys_t, \
                                        EeeT_ys_t, ps_y_c, ps_y_d, pst_yCyD, \
                                            H_c[bar_L['c']:], S_1L, k_1L, \
                                            L_1L, L, r_1L['t'])  
            
        eta_d[bar_L['d']:] = eta_t
        H_d[bar_L['d']:] = H_t
        psi_d[bar_L['d']:] = psi_t            

        eta_c[bar_L['c']:] = eta_t
        H_c[bar_L['c']:] = H_t
        psi_c[bar_L['c']:] = psi_t  
                         
        #=======================================================
        # Identifiability conditions
        #=======================================================
        w_s_t = np.mean(pst_yCyD, axis = 0)  
        eta_d, H_d, psi_d, AT_d, eta_c, H_c, psi_c, AT_c = network_identifiability(eta_d, \
                                H_d, psi_d, eta_c, H_c, psi_c, w_s_c, w_s_d, w_s_t, bar_L)
                
        #=======================================================
        # Compute GLLVM Parameters
        #=======================================================
        
        # We optimize each column separately as it is faster than all column jointly 
        # (and more relevant with the independence hypothesis)
                
        lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y_d, \
                    pzl1_ys_d, z_s_d[0], AT_d[0], tol = tol, maxstep = maxstep)
                 
        lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y_d, \
                    pzl1_ys_d, z_s_d[0], AT_d[0], tol = tol, maxstep = maxstep)
            
        lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y_d,\
                    pzl1_ys_d, z_s_d[0], AT_d[0], tol = tol, maxstep = maxstep)

        ###########################################################################
        ################## Clustering parameters updating #########################
        ###########################################################################
          
        new_lik = np.sum(np.log(py_d) + np.log(py_c))
        likelihood.append(new_lik)
        ratio = (new_lik - prev_lik)/abs(prev_lik)
        
        
        if n_clusters == 'multi':
            temp_classes = [] 
            z_tail = []
            classes = [[] for l in range(L['t'] - 1)]
            
            for l in clustering_layer:
                idx_to_sum = tuple(set(range(1, L['t'] + 1)) -\
                                   set([clustering_layer[l] + 1]))
                psl_y = pst_yCyD.reshape(numobs, *k['t'],\
                                         order = 'C').sum(idx_to_sum)
                
                temp_class_l = np.argmax(psl_y, axis = 1)
                sil_l = silhouette_score(dm, temp_class_l, metric = 'precomputed')
                    
                temp_classes.append(temp_class_l)
                #z_tail.append(Ezst_y[l].sum(1))
                new_sil[l] = sil_l
            
            #z_tail = []
            for l in range(L['t'] - 1):
                zl = Ezst_y[l].sum(1)
                z_tail.append(zl)
                    
                if best_sil[l] < new_sil[l]:
                    # Update the quantity if the silhouette score is better 
                    best_sil[l] = deepcopy(new_sil[l])
                    classes[l] = deepcopy(temp_classes[l])
                    
                    if zl.shape[-1] == 3:
                        plot_3d(zl, classes[l])
                    elif zl.shape[-1] == 2:
                        plot_2d(zl, classes[l])
           
        else: 
            idx_to_sum = tuple(set(range(1, L['t'] + 1)) - set([clustering_layer + 1]))
            psl_y = pst_yCyD.reshape(numobs, *k['t'], order = 'C').sum(idx_to_sum) 
        
            temp_classes = np.argmax(psl_y, axis = 1) 
            try:
                new_sil = silhouette_score(dm, temp_classes, metric = 'precomputed') 
            except:
                new_sil = -1
            
            z_tail = [Ezst_y[l].sum(1) for l in range(L['t'] - 1)]
                             
            if best_sil < new_sil:
                # Update the quantity if the silhouette score is better 
                zl = z_tail[clustering_layer]
                best_sil = deepcopy(new_sil)
                classes = deepcopy(temp_classes)
                
                if zl.shape[-1] == 3:
                    plot_3d(zl, classes)
                elif zl.shape[-1] == 2:
                    plot_2d(zl, classes)
        
        # Refresh the likelihood if best
        if best_lik < new_lik:
            best_lik = deepcopy(prev_lik)
      
        if prev_lik < new_lik:
            patience = 0
            M = M_growth(it_num + 1, r_1L, numobs)
        else:
            patience += 1
                       
        ###########################################################################
        ######################## Parameter selection  #############################
        ###########################################################################
                    
        min_nb_clusters = 2
        is_not_min_specif = not(is_min_architecture_reached(k, r, min_nb_clusters))
        
        if look_for_simpler_network(it_num) & perform_selec & is_not_min_specif:
            
            # To add: selection according to categ
            r_to_keep = r_select(y_bin, y_ord, y_categ, yc, zl1_ys_d,\
                                 z2_z1s_d[:bar_L['d']], w_s_d, z2_z1s_c[:bar_L['c']],
                                 z2_z1s_c[bar_L['c']:], n_clusters)
            
            # Check layer deletion
            is_c_layer_deletion = np.any([len(rl) == 0 for rl in r_to_keep['c']]) 
            is_d_layer_deletion = np.any([len(rl) == 0 for rl in r_to_keep['d']]) 
            is_head_layer_deletion = np.any([is_c_layer_deletion, is_d_layer_deletion])
            
            if is_head_layer_deletion:
                # Restart the algorithm
                if is_c_layer_deletion:
                    r['c'] = [len(rl) for rl in r_to_keep['c'][:-1]]
                    k['c'] = k['c'][:-1]
                if is_d_layer_deletion:
                    r['d'] = [len(rl) for rl in r_to_keep['d'][:-1]]
                    k['d'] = k['d'][:-1]   
                    
                init = dim_reduce_init(pd.DataFrame(y), n_clusters, k, r, nj, var_distrib,\
                                       seed = None)
                
                eta_c, eta_d, H_c, H_d, psi_c, psi_d = dispatch_dgmm_init(init)
                lambda_bin, lambda_ord, lambda_categ = dispatch_gllvm_init(init)
                w_s_c, w_s_d = dispatch_paths_init(init)
                  
                # *_1L standsds for quantities going through all the network (head + tail)
                k_1L, L_1L, L, bar_L, S_1L = nb_comps_and_layers(k)    
                r_1L = {'c': r['c'] + r['t'], 'd': r['d'] + r['t'], 't': r['t']}
                        
                M = M_growth(it_num + 1, r_1L, numobs) 
                
                prev_lik = deepcopy(new_lik)
                it_num = it_num + 1
                print(likelihood)
                
                print('Restarting the algorithm')
                continue
            
            new_Lt = np.sum([len(rl) != 0 for rl in r_to_keep['t']]) #- 1
            
            # If r_l == 0, delete the last l + 1: layers
            new_Lt = np.sum([len(rl) != 0 for rl in r_to_keep['t']]) #- 1
            
            #w_s_t = pst_yCyD.mean(0)
            k_to_keep = k_select(w_s_c, w_s_d, w_s_t, k, new_Lt, clustering_layer, n_clusters)
                        
            is_selection = check_if_selection(r_to_keep, r, k_to_keep, k, L, new_Lt)
            
            assert new_Lt > 0 # > 1 ?
            if n_clusters == 'multi':
                assert new_Lt == L['t']
            
            if is_selection:
                
                # Part to change when update also number of layers on each head 
                nb_deleted_layers_tail = L['t'] - new_Lt
                L['t'] = new_Lt
                L_1L = {keys: values - nb_deleted_layers_tail for keys, values in L_1L.items()}
                
                eta_c, eta_d, H_c, H_d, psi_c, psi_d = dgmm_coeff_selection(eta_c,\
                            H_c, psi_c, eta_d, H_d, psi_d, L, r_to_keep, k_to_keep)
                    
                lambda_bin, lambda_ord, lambda_categ = gllvm_coeff_selection(lambda_bin, lambda_ord,\
                                                               lambda_categ, r, r_to_keep)
                
                w_s_c, w_s_d = path_proba_selection(w_s_c, w_s_d, k, k_to_keep, new_Lt)
                
                k = {h: [len(k_to_keep[h][l]) for l in range(L[h])] for h in ['d', 't']}
                k['c'] = [len(k_to_keep['c'][l]) for l in range(L['c'] + 1)]
                
                r = {h: [len(r_to_keep[h][l]) for l in range(L[h])] for h in ['d', 't']}
                r['c'] = [len(r_to_keep['c'][l]) for l in range(L['c'] + 1)]
                
                k_1L, _, L, bar_L, S_1L = nb_comps_and_layers(k)    
                r_1L = {'c': r['c'] + r['t'], 'd': r['d'] + r['t'], 't': r['t']}
            
                patience = 0
                best_r = deepcopy(r)
                best_k = deepcopy(k)  
                
                #=======================================================
                # Identifiability conditions
                #======================================================= 
                eta_d, H_d, psi_d, AT_d, eta_c, H_c, psi_c, AT_c = network_identifiability(eta_d, \
                                H_d, psi_d, eta_c, H_c, psi_c, w_s_c, w_s_d, w_s_t, bar_L)
                    
            print('New architecture:')
            print('k', k)
            print('r', r)
            print('L', L)
            print('S_1L', S_1L)
            print("w_s_c", len(w_s_c))
            print("w_s_d", len(w_s_d))
        
        M = M_growth(it_num + 1, r_1L, numobs)
        
        prev_lik = deepcopy(new_lik)
        print(likelihood)
        print('Silhouette score:', new_sil)  
        
        it_num = it_num + 1

    out = dict(likelihood = likelihood, classes = classes, \
                   best_r = best_r, best_k = best_k)
    if n_clusters == 'multi':
        out['z'] = z_tail
    else:
        out['z'] = z_tail[clustering_layer]
    return(out)
Example #8
0
def closeleq(x, y):
    return np.logical_or(np.isclose(x, y), x <= y)
Example #9
0
def closegeq(x, y):
    return np.logical_or(np.isclose(x, y), x >= y)
Example #10
0
def dim_reduce_init(y, n_clusters, k, r, nj, var_distrib, seed=None):
    ''' Perform dimension reduction into a continuous r dimensional space and determine 
    the init coefficients in that space
    
    y (numobs x p ndarray): The data 
    k (dict of lists): The number of components of each layer of the network
    r (int): The dimensions of the components of each layer of the network
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
                    For categorical data: the number of different existing categories for each variable
    var_distrib (p 1darray): An array containing the types of the variables in y 
    seed (None): The random state seed to use for the dimension reduction
    ---------------------------------------------------------------------------------------
    returns (dict): All initialisation parameters
    '''

    if type(y) != pd.core.frame.DataFrame:
        raise TypeError('y should be a dataframe for prince')

    numobs = len(y)

    # Length of both heads and tail. L, bar_L and S might not be homogeneous
    # with the MDGMM notations
    bar_L = {'c': len(k['c']), 'd': len(k['d'])}
    L = {'c': len(k['c']), 'd': len(k['d']), 't': len(k['t']) - 1}

    # Paths of both heads and tail
    S = {'c': np.prod(k['c']), 'd': np.prod(k['d']), 't': np.prod(k['t'])}

    # Data of both heads
    yc = y.iloc[:, var_distrib == 'continuous'].values
    yd = y.iloc[:, var_distrib != 'continuous'].values

    #==============================================================
    # Dimension reduction performed with MCA on discrete data
    #==============================================================

    # Check input = False to remove
    mca = prince.MCA(n_components = r['d'][0], n_iter=3, copy=True,\
                     check_input=False, engine='auto', random_state = seed)
    z1D = mca.fit_transform(yd.astype(str)).values

    y = y.values

    # Be careful: The first z^c is the continuous data whether the first
    # z^d is the MCA transformed data.

    #==============================================================
    # Set the shape parameters of each discrete data type
    #==============================================================

    y_bin = y[:,
              np.logical_or(var_distrib == 'bernoulli', var_distrib ==
                            'binomial')]
    y_bin = y_bin.astype(int)
    nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',
                              var_distrib == 'binomial')]
    nb_bin = len(nj_bin)

    y_categ = y[:, var_distrib == 'categorical']
    nj_categ = nj[var_distrib == 'categorical']
    nb_categ = len(nj_categ)

    y_ord = y[:, var_distrib == 'ordinal']
    y_ord = y_ord.astype(int)
    nj_ord = nj[var_distrib == 'ordinal']
    nb_ord = len(nj_ord)

    ss = StandardScaler()
    yc = ss.fit_transform(yc)

    #=======================================================
    # Determining the Gaussian Parameters
    #=======================================================
    init = {}

    # Initialise both heads quantities
    eta_d, H_d, psi_d, zd, paths_pred_d = init_head(z1D, k['d'], r['d'],
                                                    numobs, L['d'])
    eta_c, H_c, psi_c, zc, paths_pred_c = init_head(yc, k['c'], r['c'], numobs,
                                                    L['c'])

    # Initialisation of the common layer. The coefficients are those between the last
    # Layer of both heads and the first junction layer
    eta_h_last, H_h_last, psi_h_last, paths_pred_h_last, zt_first = init_junction_layer(
        r, k, zc, zd)
    eta_d.append(eta_h_last['d'])
    H_d.append(H_h_last['d'])
    psi_d.append(psi_h_last['d'])

    eta_c.append(eta_h_last['c'])
    H_c.append(H_h_last['c'])
    psi_c.append(psi_h_last['c'])

    paths_pred_d.append(paths_pred_h_last['d'])
    paths_pred_c.append(paths_pred_h_last['c'])
    zt = [zt_first]

    # Initialisation of the following common layers
    for l in range(L['t']):
        params = get_MFA_params(zt[l], k['t'][l], r['t'][l:])
        eta_c.append(params['eta'][..., n_axis])
        eta_d.append(params['eta'][..., n_axis])

        H_c.append(params['H'])
        H_d.append(params['H'])

        psi_c.append(params['psi'])
        psi_d.append(params['psi'])

        zt.append(params['z_nextl'])
        zc.append(params['z_nextl'])
        zd.append(params['z_nextl'])

        paths_pred_c.append(params['classes'])
        paths_pred_d.append(params['classes'])

    paths_pred_c = np.stack(paths_pred_c).T
    paths_c, nb_paths_c = np.unique(paths_pred_c, return_counts=True, axis=0)
    paths_c, nb_paths_c = add_missing_paths(k['c'] + k['t'][:-1], paths_c,
                                            nb_paths_c)

    paths_pred_d = np.stack(paths_pred_d).T
    paths_d, nb_paths_d = np.unique(paths_pred_d, return_counts=True, axis=0)
    paths_d, nb_paths_d = add_missing_paths(k['d'] + k['t'][:-1], paths_d,
                                            nb_paths_d)

    w_s_c = nb_paths_c / numobs
    w_s_c = np.where(w_s_c == 0, 1E-16, w_s_c)

    w_s_d = nb_paths_d / numobs
    w_s_d = np.where(w_s_d == 0, 1E-16, w_s_d)

    k_dt = np.concatenate([k['d'] + k['t']])
    w_s_t = w_s_d.reshape(*k_dt, order='C').sum(tuple(range(L['d'])))
    w_s_t = w_s_t.reshape(-1, order='C')

    # Check that all paths have been explored
    if (len(paths_c) != S['c'] * S['t']) | (len(paths_d) != S['d'] * S['t']):
        raise RuntimeError('Path initialisation failed')

    #=============================================================
    # Enforcing identifiability constraints over the first layer
    #=============================================================

    eta_d, H_d, psi_d, AT_d, eta_c, H_c, psi_c, AT_c = network_identifiability(eta_d, \
                    H_d, psi_d, eta_c, H_c, psi_c, w_s_c, w_s_d, w_s_t, bar_L)

    init['c'] = {}
    init['c']['eta'] = eta_c
    init['c']['H'] = H_c
    init['c']['psi'] = psi_c
    init['c']['w_s'] = w_s_c  # Probabilities of each path through the network
    init['c']['z'] = zc

    init['d'] = {}
    init['d']['eta'] = eta_d
    init['d']['H'] = H_d
    init['d']['psi'] = psi_d
    init['d']['w_s'] = w_s_d  # Probabilities of each path through the network
    init['d']['z'] = zd

    # The clustering layer is the one used to perform the clustering
    # i.e. the layer l such that k[l] == n_clusters
    if not (isnumeric(n_clusters)):
        if n_clusters == 'auto':
            #n_clusters = k['t'][0]
            # First tail layer is the default clustering layer in auto mode
            clustering_layer = L['c']

        elif n_clusters == 'multi':
            clustering_layer = range(L['t'])

        else:
            raise ValueError(
                'Please enter an int, auto or multi for n_clusters')
    else:
        kc_complete = k['c'] + k['t'][:-1]
        common_clus_layer_idx = (np.array(kc_complete) == n_clusters)
        common_clus_layer_idx[:L['c']] = False
        clustering_layer = np.argmax(common_clus_layer_idx)

        assert clustering_layer >= L['c']

    init['classes'] = paths_pred_c[:, clustering_layer]

    #=======================================================
    # Determining the coefficients of the GLLVM layer
    #=======================================================

    # Determining lambda_bin coefficients.
    lambda_bin = np.zeros((nb_bin, r['d'][0] + 1))

    for j in range(nb_bin):
        Nj = int(np.max(
            y_bin[:, j]))  # The support of the jth binomial is [1, Nj]

        if Nj == 1:  # If the variable is Bernoulli not binomial
            yj = y_bin[:, j]
            z_new = zd[0]
        else:  # If not, need to convert Binomial output to Bernoulli output
            yj, z_new = bin_to_bern(Nj, y_bin[:, j], zd[0])

        lr = LogisticRegression()

        if j < r['d'][0] - 1:
            lr.fit(z_new[:, :j + 1], yj)
            lambda_bin[j, :j + 2] = np.concatenate(
                [lr.intercept_, lr.coef_[0]])
        else:
            lr.fit(z_new, yj)
            lambda_bin[j] = np.concatenate([lr.intercept_, lr.coef_[0]])

    ## Identifiability of bin coefficients
    lambda_bin[:, 1:] = lambda_bin[:, 1:] @ AT_d[0][0]

    # Determining lambda_ord coefficients
    lambda_ord = []

    for j in range(nb_ord):
        #Nj = len(np.unique(y_ord[:,j], axis = 0))  # The support of the jth ordinal is [1, Nj]
        yj = y_ord[:, j]

        ol = OrderedLogit()
        ol.fit(zd[0], yj)

        ## Identifiability of ordinal coefficients
        beta_j = (ol.beta_.reshape(1, r['d'][0]) @ AT_d[0][0]).flatten()
        lambda_ord_j = np.concatenate([ol.alpha_, beta_j])
        lambda_ord.append(lambda_ord_j)

    # Determining lambda_categ coefficients
    lambda_categ = []

    for j in range(nb_categ):
        yj = y_categ[:, j]

        lr = LogisticRegression(multi_class='multinomial')
        lr.fit(zd[0], yj)

        ## Identifiability of categ coefficients
        beta_j = lr.coef_ @ AT_d[0][0]
        lambda_categ.append(np.hstack([lr.intercept_[..., n_axis], beta_j]))

    init['lambda_bin'] = lambda_bin
    init['lambda_ord'] = lambda_ord
    init['lambda_categ'] = lambda_categ

    return init
                        'ordinal', 'categorical', 'categorical', 'categorical',\
                        'categorical', 'bernoulli', 'ordinal', 'ordinal',\
                        'continuous', 'categorical', 'bernoulli'])

# Plotting utilities
varnames = np.array(['age', 'workclass', 'fnlwgt',\
            'education.num', 'marital.status', 'occupation', 'relationship',\
            'race', 'sex', 'capital.gain', 'capital.loss',\
            'hours.per.week', 'native.country', 'income'])

p = len(varnames)

dtypes_dict = {'continuous': float, 'categorical': str, 'ordinal': float,\
              'bernoulli': str, 'binomial': int}

cat_features = np.logical_or(var_distrib == 'categorical',
                             var_distrib == 'bernoulli')

#=====================================
# Select the design
#=====================================

design = 'Absent'
filenum = 1
sub_design = 'trivarié'
prefix = design[:3] + '_'
nb_files_per_design = 10
nb_pobs = 200
sub_aliases = {'bivarié': 'bivariate', 'trivarié': 'trivariate'}

#=====================================
# Import the train and test sets
Example #12
0
    lfp_tmp /= 100.  # Scaling
    # remove evoked LFP
    lfp_tmp = lfp_tmp - np.mean(lfp_tmp, 2, keepdims=True)
    lfp[probe] = lfp_tmp

    # Desired CSD prediction locations
    z[probe] = np.stack([24. * np.ones(len(csd_loc[probe])), csd_loc[probe]]).T

# %% Visualize data, check for outlier trials
ol_bool = {}
for probe in ['probeC', 'probeD']:
    trial_sd = np.std(lfp[probe], axis=2, keepdims=True)
    ol = np.any(np.abs(lfp[probe]) > 5 * trial_sd, axis=(0, 1))
    ol_bool[probe] = ol

ol = np.logical_or(ol_bool['probeC'], ol_bool['probeD'])
print('outlier trials: %d' % np.sum(ol))
if plot_ol:
    for probe in ['probeC', 'probeD']:
        x1 = np.unique(x[probe][:, 0])
        for j in x1:
            plt.figure(figsize=(6, 16))
            for i, xi in enumerate(x[probe][x[probe][:, 0] == j]):
                plt.plot(t,
                         xi[1] + 3 * lfp[probe][i, :, np.logical_not(ol)].T,
                         'k')
                plt.plot(t, xi[1] + 3 * lfp[probe][i, :, ol].T, 'r')
            plt.title('%s x1 = %0.2f microns' % (probe, j))

for probe in ['probeC', 'probeD']:
    lfp[probe] = lfp[probe][:, :, np.logical_not(ol)]
Example #13
0
def dim_reduce_init(y,
                    n_clusters,
                    k,
                    r,
                    nj,
                    var_distrib,
                    use_famd=False,
                    seed=None):
    ''' Perform dimension reduction into a continuous r dimensional space and determine 
    the init coefficients in that space
    
    y (numobs x p ndarray): The observations containing categorical variables
    n_clusters (int): The number of clusters to look for in the data
    k (1d array): The number of components of the latent Gaussian mixture layers
    r (int): The dimension of latent variables
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    var_distrib (p 1darray): An array containing the types of the variables in y 
    use_famd (Bool): Whether to the famd method (True) or not (False), to initiate the 
                    first continuous latent variable. Otherwise MCA is used.
    seed (None): The random state seed to use for the dimension reduction
    ---------------------------------------------------------------------------------------
    returns (dict): All initialisation parameters
    '''

    L = len(k)
    numobs = len(y)
    S = np.prod(k)

    #==============================================================
    # Dimension reduction performed with MCA
    #==============================================================

    if type(y) != pd.core.frame.DataFrame:
        raise TypeError('y should be a dataframe for prince')

    if (np.array(var_distrib) == 'ordinal').all():
        print('PCA init')

        pca = prince.PCA(n_components = r[0], n_iter=3, rescale_with_mean=True,\
            rescale_with_std=True, copy=True, check_input=True, engine='auto',\
                random_state = seed)
        z1 = pca.fit_transform(y).values

    elif use_famd:
        famd = prince.FAMD(n_components = r[0], n_iter=3, copy=True, check_input=False, \
                               engine='auto', random_state = seed)
        z1 = famd.fit_transform(y).values

    else:
        # Check input = False to remove
        mca = prince.MCA(n_components = r[0], n_iter=3, copy=True,\
                         check_input=False, engine='auto', random_state = seed)
        z1 = mca.fit_transform(y).values

    z = [z1]
    y = y.values

    #==============================================================
    # Set the shape parameters of each data type
    #==============================================================

    y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',\
                               var_distrib == 'binomial')].astype(int)
    nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',\
                              var_distrib == 'binomial')]
    nb_bin = len(nj_bin)

    y_ord = y[:, var_distrib == 'ordinal'].astype(float).astype(int)
    nj_ord = nj[var_distrib == 'ordinal']
    nb_ord = len(nj_ord)

    y_categ = y[:, var_distrib == 'categorical']
    nj_categ = nj[var_distrib == 'categorical']
    nb_categ = len(nj_categ)

    # Set y_count standard error to 1
    y_cont = y[:, var_distrib == 'continuous']

    # Before was np.float
    y_cont = y_cont / np.std(y_cont.astype(float), axis=0, keepdims=True)
    nb_cont = y_cont.shape[1]

    #=======================================================
    # Determining the Gaussian Parameters
    #=======================================================
    init = {}

    eta = []
    H = []
    psi = []
    paths_pred = np.zeros((numobs, L))

    for l in range(L):
        params = get_MFA_params(z[l], k[l], r[l:])
        eta.append(params['eta'][..., n_axis])
        H.append(params['H'])
        psi.append(params['psi'])
        z.append(params['z_nextl'])
        paths_pred[:, l] = params['classes']

    paths, nb_paths = np.unique(paths_pred, return_counts=True, axis=0)
    paths, nb_paths = add_missing_paths(k, paths, nb_paths)

    w_s = nb_paths / numobs
    w_s = np.where(w_s == 0, 1E-16, w_s)

    # Check all paths have been explored
    if len(paths) != S:
        raise RuntimeError('Real path len is', S, 'while the initial number', \
                           'of path was only',  len(paths))

    w_s = w_s.reshape(*k).flatten('C')

    #=============================================================
    # Enforcing identifiability constraints over the first layer
    #=============================================================

    H = diagonal_cond(H, psi)
    Ez, AT = compute_z_moments(w_s, eta, H, psi)
    eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)

    init['eta'] = eta
    init['H'] = H
    init['psi'] = psi

    init['w_s'] = w_s  # Probabilities of each path through the network
    init['z'] = z

    # The clustering layer is the one used to perform the clustering
    # i.e. the layer l such that k[l] == n_clusters
    clustering_layer = np.argmax(np.array(k) == n_clusters)

    init[
        'classes'] = paths_pred[:,
                                clustering_layer]  # 0 To change with clustering_layer_idx

    #=======================================================
    # Determining the coefficients of the GLLVM layer
    #=======================================================

    # Determining lambda_bin coefficients.

    lambda_bin = np.zeros((nb_bin, r[0] + 1))

    for j in range(nb_bin):
        Nj = np.max(y_bin[:, j])  # The support of the jth binomial is [1, Nj]

        if Nj == 1:  # If the variable is Bernoulli not binomial
            yj = y_bin[:, j]
            z_new = z[0]
        else:  # If not, need to convert Binomial output to Bernoulli output
            yj, z_new = bin_to_bern(Nj, y_bin[:, j], z[0])

        lr = LogisticRegression()

        if j < r[0] - 1:
            lr.fit(z_new[:, :j + 1], yj)
            lambda_bin[j, :j + 2] = np.concatenate(
                [lr.intercept_, lr.coef_[0]])
        else:
            lr.fit(z_new, yj)
            lambda_bin[j] = np.concatenate([lr.intercept_, lr.coef_[0]])

    ## Identifiability of bin coefficients
    lambda_bin[:, 1:] = lambda_bin[:, 1:] @ AT[0][0]

    # Determining lambda_ord coefficients
    lambda_ord = []

    for j in range(nb_ord):
        Nj = len(np.unique(
            y_ord[:, j], axis=0))  # The support of the jth ordinal is [1, Nj]
        yj = y_ord[:, j]

        ol = OrderedLogit()
        ol.fit(z[0], yj)

        ## Identifiability of ordinal coefficients
        beta_j = (ol.beta_.reshape(1, r[0]) @ AT[0][0]).flatten()
        lambda_ord_j = np.concatenate([ol.alpha_, beta_j])
        lambda_ord.append(lambda_ord_j)

    # Determining the coefficients of the continuous variables
    lambda_cont = np.zeros((nb_cont, r[0] + 1))

    for j in range(nb_cont):
        yj = y_cont[:, j]
        linr = LinearRegression()

        if j < r[0] - 1:
            linr.fit(z[0][:, :j + 1], yj)
            lambda_cont[j, :j + 2] = np.concatenate([[linr.intercept_],
                                                     linr.coef_])
        else:
            linr.fit(z[0], yj)
            lambda_cont[j] = np.concatenate([[linr.intercept_], linr.coef_])

    ## Identifiability of continuous coefficients
    lambda_cont[:, 1:] = lambda_cont[:, 1:] @ AT[0][0]

    # Determining lambda_categ coefficients
    lambda_categ = []

    for j in range(nb_categ):
        yj = y_categ[:, j]

        lr = LogisticRegression(multi_class='multinomial')
        lr.fit(z[0], yj)

        ## Identifiability of categ coefficients
        beta_j = lr.coef_ @ AT[0][0]
        lambda_categ.append(np.hstack([lr.intercept_[..., n_axis], beta_j]))

    init['lambda_bin'] = lambda_bin
    init['lambda_ord'] = lambda_ord
    init['lambda_cont'] = lambda_cont
    init['lambda_categ'] = lambda_categ

    return init
Example #14
0
def M1DGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\
              dm =  [], max_patience = 1, use_silhouette = True):# dm small hack to remove 
    
    ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM)
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    use_silhouette (Bool): If True use the silhouette as quality criterion (best for clustering) else use
                            the likelihood (best for data augmentation).
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    prev_lik = - 1E16
    best_lik = -1E16
    
    best_sil = -1 
    new_sil = -1 
        
    tol = 0.01
    patience = 0
    is_looking_for_better_arch = False
    
    # Initialize the parameters
    eta = deepcopy(init['eta'])
    psi = deepcopy(init['psi'])
    lambda_bin = deepcopy(init['lambda_bin'])
    lambda_ord = deepcopy(init['lambda_ord'])
    lambda_cont = deepcopy(init['lambda_cont'])
    lambda_categ = deepcopy(init['lambda_categ'])

    H = deepcopy(init['H'])
    w_s = deepcopy(init['w_s']) # Probability of path s' through the network for all s' in Omega
   
    numobs = len(y)
    likelihood = []
    silhouette = []
    it_num = 0
    ratio = 1000
    np.random.seed = seed
    out = {} # Store the full output
        
    # Dispatch variables between categories
    y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')]
    nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')].astype(int)
    nb_bin = len(nj_bin)
        
    y_ord = y[:, var_distrib == 'ordinal']    
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nb_ord = len(nj_ord)
    
    y_categ = y[:, var_distrib == 'categorical']
    nj_categ = nj[var_distrib == 'categorical'].astype(int)
    nb_categ = len(nj_categ)    
    
    y_cont = y[:, var_distrib == 'continuous'].astype(float)
    nb_cont = y_cont.shape[1]
    
    # Set y_count standard error to 1
    y_cont = y_cont / y_cont.std(axis = 0, keepdims = True)
    
    L = len(k)
    k_aug = k + [1]
    S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)])    
    M = M_growth(1, r, numobs)
   
    assert nb_bin + nb_ord + nb_cont + nb_categ > 0 
    if nb_bin + nb_ord + nb_cont + nb_categ != len(var_distrib):
        raise ValueError('Some variable types were not understood,\
                         existing types are: continuous, categorical,\
                         ordinal, binomial and bernoulli')

    # Compute the Gower matrix
    if len(dm) == 0:
        cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli')
        dm = gower_matrix(y, cat_features = cat_features)
    
               
    # Do not stop the iterations if there are some iterations left or if the likelihood is increasing
    # or if we have not reached the maximum patience and if a new architecture was looked for
    # in the previous iteration
    while ((it_num < it) & (ratio > eps) & (patience <= max_patience)) | is_looking_for_better_arch:
        print(it_num)

        # The clustering layer is the one used to perform the clustering 
        # i.e. the layer l such that k[l] == n_clusters
        
        if not(isnumeric(n_clusters)):
            if n_clusters == 'auto':
                clustering_layer = 0
            else:
                raise ValueError('Please enter an int or "auto" for n_clusters')
        else:
            assert (np.array(k) == n_clusters).any()
            clustering_layer = np.argmax(np.array(k) == n_clusters)

        #####################################################################################
        ################################# S step ############################################
        #####################################################################################

        #=====================================================================
        # Draw from f(z^{l} | s, Theta) for all s in Omega
        #=====================================================================  
        
        mu_s, sigma_s = compute_path_params(eta, H, psi)
        sigma_s = ensure_psd(sigma_s)
        z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M)
         
        #========================================================================
        # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1
        #========================================================================
        
        chsi = compute_chsi(H, psi, mu_s, sigma_s)
        chsi = ensure_psd(chsi)
        rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi)

        # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively
        z2_z1s = draw_z2_z1s(chsi, rho, M, r)
                   
        #=======================================================================
        # Compute the p(y| z1) for all variable categories
        #=======================================================================
        
        py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord, \
                        lambda_categ, y_categ, nj_categ, y_cont, lambda_cont, z_s[0])
        
        #========================================================================
        # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s
        #========================================================================
                
        zl1_ys = draw_zl1_ys(z_s, py_zl1, M)
                
        #####################################################################################
        ################################# E step ############################################
        #####################################################################################
        
        #=====================================================================
        # Compute conditional probabilities used in the appendix of asta paper
        #=====================================================================
        
        pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s, py_zl1)

        #=====================================================================
        # Compute p(z^{(l)}| s, y). Equation (5) of the paper
        #=====================================================================
        
        pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S)
        pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s)
                
        
        #=====================================================================
        # Compute MFA expectations
        #=====================================================================
        
        Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \
            E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S)


        ###########################################################################
        ############################ M step #######################################
        ###########################################################################
             
        #=======================================================
        # Compute MFA Parameters 
        #=======================================================

        w_s = np.mean(ps_y, axis = 0)      
        eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H, k)

        #=======================================================
        # Identifiability conditions
        #======================================================= 

        # Update eta, H and Psi values
        H = diagonal_cond(H, psi)
        Ez, AT = compute_z_moments(w_s, eta, H, psi)
        eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)
        
        del(Ez)
        
        #=======================================================
        # Compute GLLVM Parameters
        #=======================================================
                        
        lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)
                 
        lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)
            
        lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        lambda_cont = cont_params_GLLVM(y_cont, lambda_cont, ps_y, pzl1_ys, z_s[0], AT[0],\
                     tol = tol, maxstep = maxstep)

        ###########################################################################
        ################## Clustering parameters updating #########################
        ###########################################################################
          
        new_lik = np.sum(np.log(p_y))
        likelihood.append(new_lik)
        silhouette.append(new_sil)
        ratio = abs((new_lik - prev_lik)/prev_lik)
        
        idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1]))
        psl_y = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum) 

        temp_class = np.argmax(psl_y, axis = 1)
        try:
            new_sil = silhouette_score(dm, temp_class, metric = 'precomputed')
        except ValueError:
            new_sil = -1
           
        # Store the params according to the silhouette or likelihood
        is_better = (best_sil < new_sil) if use_silhouette else (best_lik < new_lik)
            
        if is_better:
            z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1)
            best_sil = deepcopy(new_sil)
            classes = deepcopy(temp_class)
            '''
            plt.figure(figsize=(8,8))
            plt.scatter(z[:, 0], z[:, 1], c = classes)
            plt.show()
            '''
            
            # Store the output
            out['classes'] = deepcopy(classes)
            out['best_z'] = deepcopy(z_s[0])
            out['Ez.y'] = z
            out['best_k'] = deepcopy(k)
            out['best_r'] = deepcopy(r)
            
            out['best_w_s'] = deepcopy(w_s)
            out['lambda_bin'] = deepcopy(lambda_bin)
            out['lambda_ord'] = deepcopy(lambda_ord)
            out['lambda_categ'] = deepcopy(lambda_categ)
            out['lambda_cont'] = deepcopy(lambda_cont)

            out['eta'] = deepcopy(eta)            
            out['mu'] = deepcopy(mu_s)
            out['sigma'] = deepcopy(sigma_s)
            
            out['psl_y'] = deepcopy(psl_y)
            out['ps_y'] = deepcopy(ps_y)

            
        # Refresh the classes only if they provide a better explanation of the data
        if best_lik < new_lik:
            best_lik = deepcopy(prev_lik)
                               
        if prev_lik < new_lik:
            patience = 0
            M = M_growth(it_num + 2, r, numobs)
        else:
            patience += 1
                          
        ###########################################################################
        ######################## Parameter selection  #############################
        ###########################################################################
        min_nb_clusters = 2
       
        if isnumeric(n_clusters): # To change when add multi mode
            is_not_min_specif = not(np.all(np.array(k) == n_clusters) & np.array_equal(r, [2,1]))
        else:
            is_not_min_specif = not(np.all(np.array(k) == min_nb_clusters) & np.array_equal(r, [2,1]))
        
        is_looking_for_better_arch = look_for_simpler_network(it_num) & perform_selec & is_not_min_specif
        if is_looking_for_better_arch:
            r_to_keep = r_select(y_bin, y_ord, y_categ, y_cont, zl1_ys, z2_z1s, w_s)
            
            # If r_l == 0, delete the last l + 1: layers
            new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1 
            
            k_to_keep = k_select(w_s, k, new_L, clustering_layer, not(isnumeric(n_clusters)))
    
            is_L_unchanged = (L == new_L)
            is_r_unchanged = np.all([len(r_to_keep[l]) == r[l] for l in range(new_L + 1)])
            is_k_unchanged = np.all([len(k_to_keep[l]) == k[l] for l in range(new_L)])
              
            is_selection = not(is_r_unchanged & is_k_unchanged & is_L_unchanged)
            
            assert new_L > 0
            
            if is_selection:           
                
                eta = [eta[l][k_to_keep[l]] for l in range(new_L)]
                eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)]
                
                H = [H[l][k_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, r_to_keep[l]] for l in range(new_L)]
                H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)]
                
                psi = [psi[l][k_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)]
                psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)]
                
                if nb_bin > 0:
                    # Add the intercept:
                    bin_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) 
                    lambda_bin = lambda_bin[:, bin_r_to_keep]
                 
                if nb_ord > 0:
                    # Intercept coefficients handling is a little more complicated here
                    lambda_ord_intercept = [lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord]
                    Lambda_ord_var = np.stack([lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord])
                    Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]]
                    lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\
                                  for j in range(nb_ord)]
    
                # To recheck
                if nb_cont > 0:
                    # Add the intercept:
                    cont_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) 
                    lambda_cont = lambda_cont[:, cont_r_to_keep]  
                    
                if nb_categ > 0:
                    lambda_categ_intercept = [lambda_categ[j][:, 0]  for j in range(nb_categ)]
                    Lambda_categ_var = [lambda_categ_j[:,-r[0]:] for lambda_categ_j in lambda_categ]
                    Lambda_categ_var = [lambda_categ_j[:, r_to_keep[0]] for lambda_categ_j in lambda_categ]

                    lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\
                                   for j in range(nb_categ)]  

                w = w_s.reshape(*k, order = 'C')
                new_k_idx_grid = np.ix_(*k_to_keep[:new_L])
                
                # If layer deletion, sum the last components of the paths
                if L > new_L: 
                    deleted_dims = tuple(range(L)[new_L:])
                    w_s = w[new_k_idx_grid].sum(deleted_dims).flatten(order = 'C')
                else:
                    w_s = w[new_k_idx_grid].flatten(order = 'C')
    
                w_s /= w_s.sum()
                
                
                # Refresh the classes: TO RECHECK
                #idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1]))
                #ps_y_tmp = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum)
                #np.argmax(ps_y_tmp[:, k_to_keep[0]], axis = 1)

    
                k = [len(k_to_keep[l]) for l in range(new_L)]
                r = [len(r_to_keep[l]) for l in range(new_L + 1)]
                
                k_aug = k + [1]
                S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)])    
                L = new_L

                patience = 0
                
                # Identifiability conditions
                H = diagonal_cond(H, psi)
                Ez, AT = compute_z_moments(w_s, eta, H, psi)
                eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT)
        
                del(Ez)
                                                
                         
            print('New architecture:')
            print('k', k)
            print('r', r)
            print('L', L)
            print('S',S)
            print("w_s", len(w_s))
            
        prev_lik = deepcopy(new_lik)
        it_num = it_num + 1
        print(likelihood)
        print(silhouette)
        

    out['likelihood'] = likelihood
    out['silhouette'] = silhouette
    
    return(out)
Example #15
0
le = LabelEncoder()
for col_idx, colname in enumerate(y.columns):
    if var_distrib[col_idx] == 'bernoulli':
        y[colname] = le.fit_transform(y[colname])

enc = OneHotEncoder(sparse=False, drop='first')
labels_oh = enc.fit_transform(np.array(labels).reshape(-1, 1)).flatten()

nj, nj_bin, nj_ord, nj_categ = compute_nj(y, var_distrib)
y_np = y.values
nb_cont = np.sum(var_distrib == 'continuous')

p_new = y.shape[1]

# Feature category (cf)
cf_non_enc = np.logical_or(vd_categ_non_enc == 'categorical',
                           vd_categ_non_enc == 'bernoulli')

# Non encoded version of the dataset:
y_nenc_typed = y_categ_non_enc.astype(np.object)
y_np_nenc = y_nenc_typed.values

# Defining distances over the non encoded features
dm = gower_matrix(y_nenc_typed, cat_features=cf_non_enc)

dtype = {y.columns[j]: np.float64 if (var_distrib[j] != 'bernoulli') and \
        (var_distrib[j] != 'categorical') else np.str for j in range(p_new)}

y = y.astype(dtype, copy=True)

#===========================================#
# Running the algorithm
Example #16
0
def stat_all(z, target, var_distrib, weights, lambda_bin, nj_bin, lambda_categ, nj_categ,\
             lambda_ord, nj_ord, lambda_cont, y_std):

    # Prevent the shape changes caused by the scipy minimize function
    if len(z.shape) == 1: z = z[n_axis]

    #=================================
    # Binary and count variables
    #=================================

    is_count = np.logical_or(var_distrib == 'binomial',
                             var_distrib == 'bernoulli')
    count_weights = weights[is_count]

    count = stat_bin(lambda_bin, z, nj_bin)
    norm = np.where(target[is_count] > 0, target[is_count], 1)
    count_dist = ((count - target[is_count]) / norm)**2
    count_dist = np.sum(count_dist * count_weights)

    #=================================
    # Continuous variables
    #=================================

    cont_weights = weights[var_distrib == 'continuous']

    cont = stat_cont(lambda_cont, z)
    mean_cont = cont * y_std
    norm = np.where(target[var_distrib == 'continuous'] > 0,\
                     target[var_distrib == 'continuous'], 1)
    cont_dist = ((mean_cont - target[var_distrib == 'continuous'])\
                        / norm) ** 2
    cont_dist = np.sum(cont_dist * cont_weights)

    #=================================
    # Categorical variables
    #=================================

    categ_weights = weights[var_distrib == 'categorical']

    nb_categ = len(nj_categ)
    categ = stat_categ(lambda_categ, z, nj_categ)

    categ_dist = []
    for j in range(nb_categ):
        true_idx = int(target[var_distrib == 'categorical'][j])
        categ_dist.append((1 - categ[j][true_idx])**2)

    categ_dist = np.sum(categ_dist * categ_weights)

    #=================================
    # Ordinal variables
    #=================================

    ord_weights = weights[var_distrib == 'ordinal']

    nb_ord = len(nj_ord)
    ord_ = stat_ord(lambda_ord, z, nj_ord)

    ord_dist = []
    for j in range(nb_ord):
        true_idx = int(target[var_distrib == 'ordinal'][j])
        ord_dist.append((1 - (ord_[j][true_idx + 1] - ord_[j][true_idx])**2))

    ord_dist = np.sum(ord_dist * ord_weights)

    return count_dist + categ_dist + ord_dist + cont_dist
Example #17
0
def cross_validation(odom_1,
                     aligned_1,
                     odom_2,
                     aligned_2,
                     type_1,
                     type_2,
                     K=10):
    """Function to run cross-validation to run nonlinear optimization for optimal
    pose estimation and evaluation.  Performs cross-validation K times and splits
    the dataset into K (approximately) even splits, to be used for in-sample
    training and out-of-sample evaluation.

    This function estimates a relative transformation between two lidar frames
    using nonlinear optimization, and evaluates the robustness of this estimate
    through K-fold cross-validation performance of our framework.  Though this
    function does not return any values, it saves all results in the
    'results' relative path.

    Parameters:
        odom_1 (pd.DataFrame):  DataFrame corresponding to odometry data for the
            pose we wish to transform into the odom_2 frame of reference.  See
            data/main_odometry.csv for an example of the headers/columns/data
            types this function expects this DataFrame to have.

        aligned_1 (pd.DataFrame): DataFrame corresponding to aligned odometry
            data given the 3 sets of odometry data for the 3 lidar sensors.  This
            data corresponds to the odom_1 sensor frame.

        odom_2 (pd.DataFrame):  DataFrame corresponding to odometry data for the
            pose we wish to transform the odom_1 frame of reference into.  See
            data/main_odometry.csv for an example of the headers/columns/data
            types this function expects this DataFrame to have.

        aligned_2 (pd.DataFrame): DataFrame corresponding to aligned odometry
            data given the 3 sets of odometry data for the 3 lidar sensors.  This
            data corresponds to the odom_2 sensor frame.

        type_1 (str):  String denoting the lidar type.  Should be in the set
            {'main', 'front', 'rear'}.  This type corresponds to the data type
            for the odom_1 frame.

        type_2 (str):  String denoting the lidar type.  Should be in the set
            {'main', 'front', 'rear'}.  This type corresponds to the data type
            for the odom_2 frame.

        K (int):  The number of folds to be used for cross-validation.  Defaults
            to 10.
    """
    # Get ICP covariance matrices
    # Odom 1 lidar odometry
    odom1_icp, odom1_trans_cov, odom1_trans_cov_max, \
    odom1_trans_cov_avg, odom1_rot_cov, odom1_rot_cov_max, \
    odom1_rot_cov_avg, odom1_reject = parse_icp_cov(odom_1, type=type_1,
                                                  reject_thr=REJECT_THR)

    # Odom 2 lidar odometry
    odom2_icp, odom2_trans_cov, odom2_trans_cov_max, \
    odom2_trans_cov_avg, odom2_rot_cov, odom2_rot_cov_max, \
    odom2_rot_cov_avg, odom2_reject = parse_icp_cov(odom_2, type=type_2,
                                                    reject_thr=REJECT_THR)
    # Calculate relative poses
    (odom1_aligned,
     odom1_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_1)
    (odom2_aligned,
     odom2_rel_poses) = relative_pose_processing.calc_rel_poses(aligned_2)

    # Compute weights for weighted estimate
    cov_t_odom1, cov_R_odom1 = compute_weights_euler(odom1_aligned)
    cov_t_odom2, cov_R_odom2 = compute_weights_euler(odom2_aligned)

    # Extract a single scalar using the average value from rotation and translation
    var_t_odom1 = extract_variance(cov_t_odom1, mode="max")
    var_R_odom1 = extract_variance(cov_R_odom1, mode="max")
    var_t_odom2 = extract_variance(cov_t_odom2, mode="max")
    var_R_odom2 = extract_variance(cov_R_odom2, mode="max")

    # Optimization (1) Instantiate a manifold
    translation_manifold = Euclidean(3)  # Translation vector
    so3 = Rotations(3)  # Rotation matrix
    manifold = Product((so3, translation_manifold))  # Instantiate manifold

    # Get initial guesses for our estimations
    if os.path.exists(PKL_POSES_PATH):  # Check to make sure path exists
        transforms_dict = load_transforms(
            PKL_POSES_PATH)  # Relative transforms

    # Map types to sensor names to access initial estimate relative transforms
    types2sensors = {"main": "velodyne", "front": "front", "rear": "rear"}

    # Now get initial guesses from the relative poses
    initial_guess_odom1_odom2 = transforms_dict["{}_{}".format(
        types2sensors[type_1], types2sensors[type_2])]
    # Print out all the initial estimates as poses
    print("INITIAL GUESS {} {}: \n {} \n".format(types2sensors[type_1],
                                                 types2sensors[type_2],
                                                 initial_guess_odom1_odom2))

    # Get rotation matrices for initial guesses
    R0_odom1_odom2, t0_odom1_odom2 = initial_guess_odom1_odom2[:3, :3], \
                                     initial_guess_odom1_odom2[:3, 3]
    X0_odom1_odom2 = (R0_odom1_odom2, t0_odom1_odom2)  # Pymanopt estimate
    print("INITIAL GUESS {} {}: \n R0: \n {} \n\n t0: \n {} \n".format(
        types2sensors[type_1], types2sensors[type_2], R0_odom1_odom2,
        t0_odom1_odom2))

    # Create KFold xval object to get training/validation indices
    kf = KFold(n_splits=K, random_state=None, shuffle=False)
    k = 0  # Set fold counter to 0

    # Dataset
    A = np.array(odom2_rel_poses)  # First set of poses
    B = np.array(odom1_rel_poses)  # Second set of poses
    N = len(A)
    assert len(A) == len(B)  # Sanity check to ensure odometry data matches
    r = np.logical_or(np.array(odom1_reject)[:N],
                      np.array(odom2_reject)[:N])  # Outlier rejection

    print("NUMBER OF CROSS-VALIDATION FOLDS: {}".format(K))

    # Iterate over 30 second intervals of the poses
    for train_index, test_index in kf.split(
            A):  # Perform K-fold cross-validation

        # Path for results from manifold optimization
        analysis_results_path = os.path.join(ANALYSIS_RESULTS_PATH,
                                             "k={}".format(k))
        final_estimates_path = os.path.join(FINAL_ESTIMATES_PATH,
                                            "k={}".format(k))
        odometry_plots_path = os.path.join(ODOMETRY_PLOTS_PATH,
                                           "k={}".format(k))

        # Make sure all paths exist - if they don't create them
        for path in [
                analysis_results_path, final_estimates_path,
                odometry_plots_path
        ]:
            check_dir(path)

        # Get training data
        A_train = A[train_index]
        B_train = B[train_index]
        N_train = min(A_train.shape[0], B_train.shape[0])
        r_train = r[train_index]
        print("FOLD NUMBER: {}, NUMBER OF TRAINING SAMPLES: {}".format(
            k, N_train))

        omega = np.max([var_R_odom1, var_R_odom2
                        ])  # Take average across different odometries
        rho = np.max([var_t_odom1,
                      var_t_odom2])  # Take average across different odometries

        cost_lambda = lambda x: cost(x, A_train, B_train, r_train, rho, omega,
                                     WEIGHTED)  # Create cost function
        problem = Problem(manifold=manifold,
                          cost=cost_lambda)  # Create problem
        solver = CustomSteepestDescent()  # Create custom solver
        X_opt = solver.solve(problem, x=X0_odom1_odom2)  # Solve problem
        print("Initial Guess for Main-Front Transformation: \n {}".format(
            initial_guess_odom1_odom2))
        print("Optimal solution between {} and {} "
              "reference frames: \n {}".format(types2sensors[type_1],
                                               types2sensors[type_2], X_opt))

        # Take intermediate values for plotting
        estimates_x = solver.estimates
        errors = solver.errors
        iters = solver.iterations

        # Metrics dictionary
        estimates_dict = {i: T for i, T in zip(iters, estimates_x)}
        error_dict = {i: e for i, e in zip(iters, errors)}

        # Save intermediate results to a pkl file
        estimates_fname = os.path.join(
            analysis_results_path,
            "estimates_{}_{}.pkl".format(types2sensors[type_1],
                                         types2sensors[type_2], X_opt))
        error_fname = os.path.join(
            analysis_results_path,
            "error_{}_{}.pkl".format(types2sensors[type_1],
                                     types2sensors[type_2], X_opt))

        # Save estimates to pickle file
        with open(estimates_fname, "wb") as pkl_estimates:
            pickle.dump(estimates_dict, pkl_estimates)
            pkl_estimates.close()

        # Save error to pickle file
        with open(error_fname, "wb") as pkl_error:
            pickle.dump(error_dict, pkl_error)
            pkl_error.close()

        # Calculate difference between initial guess and final
        X_opt_T = construct_pose(X_opt[0], X_opt[1].reshape((3, 1)))
        print("DIFFERENCE IN MATRICES: \n {}".format(
            np.subtract(X_opt_T, initial_guess_odom1_odom2)))

        # Compute the weighted RMSE (training/in-sample)
        train_rmse_init_weighted, train_rmse_final_weighted, train_rmse_init_R_weighted, \
        train_rmse_init_t_weighted, train_rmse_final_R_weighted, \
        train_rmse_final_t_weighted = compute_rmse_weighted(
            initial_guess_odom1_odom2, X_opt_T, A_train, B_train, rho, omega)

        # Compute the unweighted RMSE (training/in-sample)
        train_rmse_init_unweighted, train_rmse_final_unweighted, train_rmse_init_R_unweighted, \
        train_rmse_init_t_unweighted, train_rmse_final_R_unweighted, \
        train_rmse_final_t_unweighted = compute_rmse_unweighted(
            initial_guess_odom1_odom2, X_opt_T, A_train, B_train)

        # Concatenate all RMSE values for training/in-sample
        train_rmses = [
            train_rmse_init_unweighted, train_rmse_final_unweighted,
            train_rmse_init_weighted, train_rmse_final_weighted,
            train_rmse_init_R_unweighted, train_rmse_init_t_unweighted,
            train_rmse_final_R_unweighted, train_rmse_final_t_unweighted,
            train_rmse_init_R_weighted, train_rmse_init_t_weighted,
            train_rmse_final_R_weighted, train_rmse_final_t_weighted
        ]

        # Display and save RMSEs
        outpath = os.path.join(
            analysis_results_path,
            "train_rmse_{}_{}.txt".format(types2sensors[type_1],
                                          types2sensors[type_2]))
        display_and_save_rmse(train_rmses, outpath)

        # Get test data
        A_test = A[test_index]
        B_test = B[test_index]
        N_test = min(A_test.shape[0], B_test.shape[0])
        print("NUMBER OF TEST SAMPLES: {}".format(N_test))

        # Compute the weighted RMSE (testing/out-of-sample)
        test_rmse_init_weighted, test_rmse_final_weighted, test_rmse_init_R_weighted, \
        test_rmse_init_t_weighted, test_rmse_final_R_weighted, \
        test_rmse_final_t_weighted = compute_rmse_weighted(initial_guess_odom1_odom2,
                                                            X_opt_T, A_test, B_test, rho, omega)

        # Compute the unweighted RMSE (testing/out-of-sample)
        test_rmse_init_unweighted, test_rmse_final_unweighted, test_rmse_init_R_unweighted, \
        test_rmse_init_t_unweighted, test_rmse_final_R_unweighted, \
        test_rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_odom1_odom2,
                                                                X_opt_T, A_test, B_test)

        # Concatenate all RMSE values for testing/out-of-sample
        test_rmses = [
            test_rmse_init_unweighted, test_rmse_final_unweighted,
            test_rmse_init_weighted, test_rmse_final_weighted,
            test_rmse_init_R_unweighted, test_rmse_init_t_unweighted,
            test_rmse_final_R_unweighted, test_rmse_final_t_unweighted,
            test_rmse_init_R_weighted, test_rmse_init_t_weighted,
            test_rmse_final_R_weighted, test_rmse_final_t_weighted
        ]

        # Display and save RMSEs
        outpath = os.path.join(
            analysis_results_path,
            "test_rmse_{}_{}.txt".format(types2sensors[type_1],
                                         types2sensors[type_2]))
        display_and_save_rmse(test_rmses, outpath)

        # Save final estimates
        final_estimate_outpath = os.path.join(
            final_estimates_path, "{}_{}.txt".format(types2sensors[type_1],
                                                     types2sensors[type_2]))
        np.savetxt(final_estimate_outpath, X_opt_T)

        # Finally, increment k
        k += 1
Example #18
0
def main():
    """Main function to run nonlinear manifold optimization on SE(3) to estimate
    an optimal relative pose transformation between coordinate frames given by
    the different lidar sensors."""
    # Extract and process the CSVs
    main_odometry = relative_pose_processing.process_df(MAIN_ODOM_CSV)
    front_odometry = relative_pose_processing.process_df(FRONT_ODOM_CSV)
    rear_odometry = relative_pose_processing.process_df(REAR_ODOM_CSV)

    # Process poses
    (main_aligned, front_aligned,
     rear_aligned) = relative_pose_processing.align_df(
         [main_odometry, front_odometry, rear_odometry])

    # Get ICP covariance matrices
    # Main lidar odometry
    main_icp, main_trans_cov, main_trans_cov_max, \
        main_trans_cov_avg, main_rot_cov, main_rot_cov_max, \
        main_rot_cov_avg, main_reject = parse_icp_cov(main_odometry, type="main",
                                                      reject_thr=REJECT_THR)

    # Front lidar odometry
    front_icp, front_trans_cov, front_trans_cov_max, \
        front_trans_cov_avg, front_rot_cov, front_rot_cov_max, \
        front_rot_cov_avg, front_reject = parse_icp_cov(front_odometry, type="front",
                                                        reject_thr=REJECT_THR)

    # Rear lidar odometry
    rear_icp, rear_trans_cov, rear_trans_cov_max, \
        rear_trans_cov_avg, rear_rot_cov, rear_rot_cov_max, \
        rear_rot_cov_avg, rear_reject = parse_icp_cov(rear_odometry, type="rear",
                                                      reject_thr=REJECT_THR)

    # Calculate relative poses
    (main_aligned,
     main_rel_poses) = relative_pose_processing.calc_rel_poses(main_aligned)
    (front_aligned,
     front_rel_poses) = relative_pose_processing.calc_rel_poses(front_aligned)
    (rear_aligned,
     rear_rel_poses) = relative_pose_processing.calc_rel_poses(rear_aligned)

    cov_t_main, cov_R_main = compute_weights_euler(main_aligned)
    cov_t_front, cov_R_front = compute_weights_euler(front_aligned)
    cov_t_rear, cov_R_rear = compute_weights_euler(rear_aligned)

    # Extract a single scalar using the average value from rotation and translation
    var_t_main = extract_variance(cov_t_main, mode="max")
    var_R_main = extract_variance(cov_R_main, mode="max")
    var_t_front = extract_variance(cov_t_front, mode="max")
    var_R_front = extract_variance(cov_R_front, mode="max")
    var_t_rear = extract_variance(cov_t_main, mode="max")
    var_R_rear = extract_variance(cov_R_rear, mode="max")

    # Optimization (1) Instantiate a manifold
    translation_manifold = Euclidean(3)  # Translation vector
    so3 = Rotations(3)  # Rotation matrix
    manifold = Product((so3, translation_manifold))  # Instantiate manifold

    # Get initial guesses for our estimations
    initial_poses = {}
    if os.path.exists(PKL_POSES_PATH):  # Check to make sure path exists
        transforms_dict = load_transforms(
            PKL_POSES_PATH)  # Loads relative transforms

    # Now get initial guesses from the relative poses
    initial_guess_main_front = transforms_dict[
        "velodyne_front"]  # Get relative transform from main to front (T^{V}_{F})
    initial_guess_main_rear = transforms_dict[
        "velodyne_rear"]  # Get relative transform from front to main T^{V}_{B})
    initial_guess_front_rear = np.linalg.inv(
        initial_guess_main_front
    ) @ initial_guess_main_rear  # Get relative transform from front to rear T^{B}_{W})
    direct_initial_guess_front_rear = transforms_dict[
        "direct_front_rear"]  # Transform directly computed

    # Print out all the initial estimates as poses
    print(
        "INITIAL GUESS MAIN FRONT: \n {} \n".format(initial_guess_main_front))
    print("INITIAL GUESS MAIN REAR: \n {} \n".format(initial_guess_main_rear))
    print(
        "INITIAL GUESS FRONT REAR: \n {} \n".format(initial_guess_front_rear))
    print("INITIAL GUESS DIRECT FRONT REAR: \n {} \n".format(
        direct_initial_guess_front_rear))

    # Get rotation matrices for initial guesses
    R0_main_front, t0_main_front = initial_guess_main_front[:3, :
                                                            3], initial_guess_main_front[:
                                                                                         3,
                                                                                         3]
    X0_main_front = (R0_main_front, t0_main_front)
    print("INITIAL GUESS MAIN FRONT: \n R0: \n {} \n\n t0: \n {} \n".format(
        R0_main_front, t0_main_front))

    R0_main_rear, t0_main_rear = initial_guess_main_rear[:3, :
                                                         3], initial_guess_main_rear[:
                                                                                     3,
                                                                                     3]
    X0_main_rear = (R0_main_rear, t0_main_rear)
    print("INITIAL GUESS MAIN REAR: \n R0: \n {} \n\n t0: \n {} \n".format(
        R0_main_rear, t0_main_rear))

    R0_front_rear, t0_front_rear = initial_guess_front_rear[:3, :
                                                            3], initial_guess_front_rear[:
                                                                                         3,
                                                                                         3]
    X0_front_rear = (R0_front_rear, t0_front_rear)
    print("INITIAL GUESS FRONT REAR: \n R0: \n {} \n\n t0: \n {} \n".format(
        R0_front_rear, t0_front_rear))

    ######################## MAIN FRONT CALIBRATION ################################
    # Carry out optimization for main-front homogeneous transformations
    ### PARAMETERS ###
    A = np.array(front_rel_poses)  # First set of poses
    B = np.array(main_rel_poses)  # Second set of poses
    N = min(A.shape[0], B.shape[0])
    r = np.logical_or(np.array(main_reject[:N]), np.array(
        front_reject[:N]))  # If either has high variance, reject the sample
    omega = np.max([var_R_main,
                    var_R_front])  # Take average across different odometries
    rho = np.max([var_t_main,
                  var_t_front])  # Take average across different odometries
    ### PARAMETERS ###

    cost_main_front = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED)
    problem_main_front = Problem(
        manifold=manifold, cost=cost_main_front
    )  # (2a) Compute the optimization between main and front
    solver_main_front = CustomSteepestDescent(
    )  # (3) Instantiate a Pymanopt solver
    Xopt_main_front = solver_main_front.solve(problem_main_front,
                                              x=X0_main_front)
    print("Initial Guess for Main-Front Transformation: \n {}".format(
        initial_guess_main_front))
    print("Optimal solution between main and front reference frames: \n {}".
          format(Xopt_main_front))

    # Take intermediate values for plotting
    estimates_x_main_front = solver_main_front.estimates
    errors_main_front = solver_main_front.errors
    iters_main_front = solver_main_front.iterations

    # Metrics dictionary
    estimates_dict_main_front = {
        i: T
        for i, T in zip(iters_main_front, estimates_x_main_front)
    }
    error_dict_main_front = {
        i: e
        for i, e in zip(iters_main_front, errors_main_front)
    }

    # Save intermediate results to a pkl file
    estimates_fname_main_front = os.path.join(ANALYSIS_RESULTS_PATH,
                                              "estimates_main_front.pkl")
    error_fname_main_front = os.path.join(ANALYSIS_RESULTS_PATH,
                                          "error_main_front.pkl")

    # Save estimates to pickle file
    with open(estimates_fname_main_front, "wb") as pkl_estimates:
        pickle.dump(estimates_dict_main_front, pkl_estimates)
        pkl_estimates.close()

    # Save error to pickle file
    with open(error_fname_main_front, "wb") as pkl_error:
        pickle.dump(error_dict_main_front, pkl_error)
        pkl_error.close()

    # Calculate difference between initial guess and final
    XOpt_T_main_front = construct_pose(Xopt_main_front[0],
                                       Xopt_main_front[1].reshape((3, 1)))
    print("DIFFERENCE IN MATRICES: \n {}".format(
        np.subtract(XOpt_T_main_front, initial_guess_main_front)))

    # Compute the weighted and unweighted RMSE
    rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \
    rmse_init_t_weighted, rmse_final_R_weighted, \
    rmse_final_t_weighted = compute_rmse_weighted(initial_guess_main_front,
                                                  XOpt_T_main_front, A, B, rho,
                                                  omega)
    rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \
    rmse_init_t_unweighted, rmse_final_R_unweighted, \
    rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_main_front,
                                                      XOpt_T_main_front, A, B)
    rmses = [
        rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted,
        rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted,
        rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted,
        rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted
    ]

    # Display and save RMSEs
    outpath = os.path.join(ANALYSIS_RESULTS_PATH, "main_front_rmse.txt")
    display_and_save_rmse(rmses, outpath)

    # Save final estimates
    final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH,
                                          "main_front_final.txt")
    np.savetxt(final_estimate_outpath, XOpt_T_main_front)
    ################################################################################

    ######################## MAIN REAR CALIBRATION #################################
    ### PARAMETERS ###
    A = np.array(rear_rel_poses)  # First set of poses
    B = np.array(main_rel_poses)  # Second set of poses
    N = min(A.shape[0], B.shape[0])
    r = np.logical_or(np.array(main_reject[:N]), np.array(
        rear_reject[:N]))  # If either has high variance, reject the sample
    omega = np.max([var_R_main,
                    var_R_rear])  # Take average across different odometries
    rho = np.max([var_t_main,
                  var_t_rear])  # Take average across different odometries
    ### PARAMETERS ###

    cost_main_rear = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED)
    # Carry out optimization for main-rear homogeneous transformations
    problem_main_rear = Problem(
        manifold=manifold, cost=cost_main_rear
    )  # (2a) Compute the optimization between main and front
    solver_main_rear = CustomSteepestDescent(
    )  # (3) Instantiate a Pymanopt solver
    Xopt_main_rear = solver_main_rear.solve(problem_main_rear, x=X0_main_rear)
    print("Initial Guess for Main-Rear Transformation: \n {}".format(
        initial_guess_main_rear))
    print("Optimal solution between main and rear reference frames: \n {}".
          format(Xopt_main_rear))

    # Take intermediate values for plotting
    estimates_x_main_rear = solver_main_rear.estimates
    errors_main_rear = solver_main_rear.errors
    iters_main_rear = solver_main_rear.iterations

    # Metrics dictionary
    estimates_dict_main_rear = {
        i: T
        for i, T in zip(iters_main_rear, estimates_x_main_rear)
    }
    error_dict_main_rear = {
        i: e
        for i, e in zip(iters_main_rear, errors_main_rear)
    }

    # Save intermediate results to a pkl file
    estimates_fname_main_rear = os.path.join(ANALYSIS_RESULTS_PATH,
                                             "estimates_main_rear.pkl")
    error_fname_main_rear = os.path.join(ANALYSIS_RESULTS_PATH,
                                         "error_main_rear.pkl")

    # Save estimates to pickle file
    with open(estimates_fname_main_rear, "wb") as pkl_estimates:
        pickle.dump(estimates_dict_main_rear, pkl_estimates)
        pkl_estimates.close()

    # Save error to pickle file
    with open(error_fname_main_rear, "wb") as pkl_error:
        pickle.dump(error_dict_main_rear, pkl_error)
        pkl_error.close()

    # Calculate difference between initial guess and final
    XOpt_T_main_rear = construct_pose(Xopt_main_rear[0],
                                      Xopt_main_rear[1].reshape((3, 1)))
    print("DIFFERENCE IN MATRICES: \n {}".format(
        np.subtract(XOpt_T_main_rear, initial_guess_main_rear)))

    # Compute the weighted and unweighted RMSE
    rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \
           rmse_init_t_weighted, rmse_final_R_weighted, \
           rmse_final_t_weighted = compute_rmse_weighted(initial_guess_main_rear, XOpt_T_main_rear, A, B, rho, omega)
    rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \
           rmse_init_t_unweighted, rmse_final_R_unweighted, \
           rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_main_rear, XOpt_T_main_rear, A, B)
    rmses = [
        rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted,
        rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted,
        rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted,
        rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted
    ]

    # Display and save RMSEs
    outpath = os.path.join(ANALYSIS_RESULTS_PATH, "main_rear_rmse.txt")
    display_and_save_rmse(rmses, outpath)

    # Save final estimates
    final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH,
                                          "main_rear_final.txt")
    np.savetxt(final_estimate_outpath, XOpt_T_main_rear)
    ################################################################################

    ######################## FRONT REAR CALIBRATION ################################
    ### PARAMETERS ###
    A = np.array(rear_rel_poses)  # First set of poses
    B = np.array(front_rel_poses)  # Second set of poses
    N = min(A.shape[0], B.shape[0])
    r = np.logical_or(np.array(front_reject[:N]), np.array(
        rear_reject[:N]))  # If either has high variance, reject the sample
    omega = np.max([var_R_front,
                    var_R_rear])  # Take average across different odometries
    rho = np.max([var_t_front,
                  var_t_rear])  # Take average across different odometries
    ### PARAMETERS ###

    cost_front_rear = lambda x: cost(x, A, B, r, rho, omega, WEIGHTED)
    # Carry out optimization for front-rear homogeneous transformations
    problem_front_rear = Problem(
        manifold=manifold, cost=cost_front_rear
    )  # (2a) Compute the optimization between main and front
    solver_front_rear = CustomSteepestDescent(
    )  # (3) Instantiate a Pymanopt solver
    Xopt_front_rear = solver_front_rear.solve(problem_front_rear,
                                              x=X0_front_rear)
    print("Initial Guess for Front-Rear Transformation: \n {}".format(
        initial_guess_front_rear))
    print("Optimal solution between front and rear reference frames: \n {}".
          format(Xopt_front_rear))

    # Take intermediate values for plotting
    estimates_x_front_rear = solver_front_rear.estimates
    errors_front_rear = solver_front_rear.errors
    iters_front_rear = solver_front_rear.iterations

    # Metrics dictionary
    estimates_dict_front_rear = {
        i: T
        for i, T in zip(iters_front_rear, estimates_x_front_rear)
    }
    error_dict_front_rear = {
        i: e
        for i, e in zip(iters_front_rear, errors_front_rear)
    }

    # Save intermediate results to a pkl file
    estimates_fname_front_rear = os.path.join(ANALYSIS_RESULTS_PATH,
                                              "estimates_front_rear.pkl")
    error_fname_front_rear = os.path.join(ANALYSIS_RESULTS_PATH,
                                          "error_front_rear.pkl")

    # Save estimates to pickle file
    with open(estimates_fname_front_rear, "wb") as pkl_estimates:
        pickle.dump(estimates_dict_front_rear, pkl_estimates)
        pkl_estimates.close()

    # Save error to pickle file
    with open(error_fname_front_rear, "wb") as pkl_error:
        pickle.dump(error_dict_front_rear, pkl_error)
        pkl_error.close()

    # Calculate difference between initial guess and final
    XOpt_T_front_rear = construct_pose(Xopt_front_rear[0],
                                       Xopt_front_rear[1].reshape((3, 1)))
    print("DIFFERENCE IN MATRICES: \n {}".format(
        np.subtract(XOpt_T_front_rear, initial_guess_front_rear)))

    # Compute the weighted and unweighted RMSE
    rmse_init_weighted, rmse_final_weighted, rmse_init_R_weighted, \
    rmse_init_t_weighted, rmse_final_R_weighted, \
    rmse_final_t_weighted = compute_rmse_weighted(initial_guess_front_rear,
                                                  XOpt_T_front_rear, A, B, rho,
                                                  omega)
    rmse_init_unweighted, rmse_final_unweighted, rmse_init_R_unweighted, \
    rmse_init_t_unweighted, rmse_final_R_unweighted, \
    rmse_final_t_unweighted = compute_rmse_unweighted(initial_guess_front_rear,
                                                      XOpt_T_front_rear, A, B)
    rmses = [
        rmse_init_unweighted, rmse_final_unweighted, rmse_init_weighted,
        rmse_final_weighted, rmse_init_R_unweighted, rmse_init_t_unweighted,
        rmse_final_R_unweighted, rmse_final_t_unweighted, rmse_init_R_weighted,
        rmse_init_t_weighted, rmse_final_R_weighted, rmse_final_t_weighted
    ]

    # Display and save RMSEs
    outpath = os.path.join(ANALYSIS_RESULTS_PATH, "front_rear_rmse.txt")
    display_and_save_rmse(rmses, outpath)

    # Save final estimates
    final_estimate_outpath = os.path.join(FINAL_ESTIMATES_PATH,
                                          "front_rear_final.txt")
    np.savetxt(final_estimate_outpath, XOpt_T_front_rear)
    ################################################################################

    # Display all results
    print("_________________________________________________________")
    print("_____________________ALL RESULTS_________________________")
    print("_________________________________________________________")
    print("Initial Guess for Main-Front Transformation: \n {}".format(
        initial_guess_main_front))
    print("Optimal solution between main and front reference frames: \n {}".
          format(Xopt_main_front))
    print("_________________________________________________________")
    print("Initial Guess for Main-Rear Transformation: \n {}".format(
        initial_guess_main_rear))
    print("Optimal solution between main and rear reference frames: \n {}".
          format(Xopt_main_rear))
    print("_________________________________________________________")
    print("Initial Guess for Front-Rear Transformation: \n {}".format(
        initial_guess_front_rear))
    print("Optimal solution between front and rear reference frames: \n {}".
          format(Xopt_front_rear))
    print("_________________________________________________________")
Example #19
0
            # Invert the order of the columns so that age is no more the first bernoulli
            #***************************************************************************
            '''
            train[['age', 'workclass', 'fnlwgt', 'education.num', 'marital.status',
                   'occupation', 'relationship', 'race', 'capital.gain',
                   'capital.loss', 'hours.per.week', 'native.country', 'income', 'sex']]
            
            
            var_distrib = np.array(['continuous', 'categorical', 'continuous',\
                        'ordinal', 'categorical', 'categorical', 'categorical',\
                        'categorical', 'ordinal', 'ordinal',\
                        'continuous', 'categorical', 'bernoulli', 'bernoulli']) 
            '''

            p_new = len(var_distrib)
            cat_features = np.logical_or(var_distrib == 'categorical',
                                         var_distrib == 'ordinal')

            #*****************************************************************
            # Formating the data
            #*****************************************************************

            # Encode categorical datas
            for col_idx, colname in enumerate(train.columns):
                if var_distrib[col_idx] == 'categorical':
                    le = LabelEncoder()

                    # Convert them into numerical values
                    train[colname] = le.fit_transform(train[colname])
                    le_dict[colname] = deepcopy(le)

            # Encode binary data
Example #20
0
    def fit(self,
            x=None,
            c=None,
            n=None,
            t=None,
            how='MLE',
            offset=False,
            zi=False,
            lfp=False,
            tl=None,
            tr=None,
            xl=None,
            xr=None,
            fixed=None,
            heuristic='Turnbull',
            init=[],
            rr='y',
            on_d_is_0=False,
            turnbull_estimator='Fleming-Harrington'):
        r"""

        The central feature to SurPyval's capability. This function aimed to have an API to mimic the 
        simplicity of the scipy API. That is, to use a simple :code:`fit()` call, with as many or as few
        parameters as is needed.

        Parameters
        ----------

        x : array like, optional
            Array of observations of the random variables. If x is :code:`None`, xl and xr must be provided.
        c : array like, optional
            Array of censoring flag. -1 is left censored, 0 is observed, 1 is right censored, and 2 is intervally
            censored. If not provided will assume all values are observed.
        n : array like, optional
            Array of counts for each x. If data is proivded as counts, then this can be provided. If :code:`None`
            will assume each observation is 1.
        t : 2D-array like, optional
            2D array like of the left and right values at which the respective observation was truncated. If
            not provided it assumes that no truncation occurs.
        how : {'MLE', 'MPP', 'MOM', 'MSE', 'MPS'}, optional
            Method to estimate parameters, these are:

                - MLE : Maximum Likelihood Estimation
                - MPP : Method of Probability Plotting
                - MOM : Method of Moments
                - MSE : Mean Square Error
                - MPS : Maximum Product Spacing

        offset : boolean, optional
            If :code:`True` finds the shifted distribution. If not provided assumes not a shifted distribution.
            Only works with distributions that are supported on the half-real line.

        tl : array like or scalar, optional
            Values of left truncation for observations. If it is a scalar value assumes each observation is
            left truncated at the value. If an array, it is the respective 'late entry' of the observation

        tr : array like or scalar, optional
            Values of right truncation for observations. If it is a scalar value assumes each observation is
            right truncated at the value. If an array, it is the respective right truncation value for each
            observation

        xl : array like, optional
            Array like of the left array for 2-dimensional input of x. This is useful for data that is all
            intervally censored. Must be used with the :code:`xr` input.

        xr : array like, optional
            Array like of the right array for 2-dimensional input of x. This is useful for data that is all
            intervally censored. Must be used with the :code:`xl` input.

        fixed : dict, optional
            Dictionary of parameters and their values to fix. Fixes parameter by name.

        heuristic : {'"Blom", "Median", "ECDF", "Modal", "Midpoint", "Mean", "Weibull", "Benard", "Beard", "Hazen", "Gringorten", "None", "Tukey", "DPW", "Fleming-Harrington", "Kaplan-Meier", "Nelson-Aalen", "Filliben", "Larsen", "Turnbull"}
            Plotting method to use, if using the probability plotting, MPP, method.

        init : array like, optional
            initial guess of parameters. Useful if method is failing.

        rr : ('y', 'x')
            The dimension on which to minimise the spacing between the line and the observation.
            If 'y' the mean square error between the line and vertical distance to each point is minimised.
            If 'x' the mean square error between the line and horizontal distance to each point is minimised.

        on_d_is_0 : boolean, optional
            For the case when using MPP and the highest value is right censored, you can choosed to
            include this value into the regression analysis or not. That is, if :code:`False`, all values
            where there are 0 deaths are excluded from the regression. If :code:`True` all values
            regardless of whether there is a death or not are included in the regression.

        turnbull_estimator : ('Nelson-Aalen', 'Kaplan-Meier', or 'Fleming-Harrington'), str, optional
            If using the Turnbull heuristic, you can elect to use either the KM, NA, or FH estimator with 
            the Turnbull estimates of r, and d. Defaults to FH.

        Returns
        -------

        model : Parametric
            A parametric model with the fitted parameters and methods for all functions of the distribution using the 
            fitted parameters.

        Examples
        --------
        >>> from surpyval import Weibull
        >>> import numpy as np
        >>> x = Weibull.random(100, 10, 4)
        >>> model = Weibull.fit(x)
        >>> print(model)
        Parametric SurPyval Model
        =========================
        Distribution        : Weibull
        Fitted by           : MLE
        Parameters          :
             alpha: 10.551521182640098
              beta: 3.792549834495306
        >>> Weibull.fit(x, how='MPS', fixed={'alpha' : 10})
        Parametric SurPyval Model
        =========================
        Distribution        : Weibull
        Fitted by           : MPS
        Parameters          :
             alpha: 10.0
              beta: 3.4314657446866836
        >>> Weibull.fit(xl=x-1, xr=x+1, how='MPP')
        Parametric SurPyval Model
        =========================
        Distribution        : Weibull
        Fitted by           : MPP
        Parameters          :
             alpha: 9.943092756713078
              beta: 8.613016934518258
        >>> c = np.zeros_like(x)
        >>> c[x > 13] = 1
        >>> x[x > 13] = 13
        >>> c = c[x > 6]
        >>> x = x[x > 6]
        >>> Weibull.fit(x=x, c=c, tl=6)
        Parametric SurPyval Model
        =========================
        Distribution        : Weibull
        Fitted by           : MLE
        Parameters          :
             alpha: 10.363725328793413
              beta: 4.9886821457305865
        """

        if offset and self.name in [
                'Normal', 'Beta', 'Uniform', 'Gumbel', 'Logistic'
        ]:
            raise ValueError(
                '{dist} distribution cannot be offset'.format(dist=self.name))

        if how not in PARA_METHODS:
            raise ValueError('"how" must be one of: ' + str(PARA_METHODS))

        if how == 'MPP' and self.name == 'ExpoWeibull':
            raise ValueError(
                'ExpoWeibull distribution does not work with probability plot fitting'
            )

        if t is not None and how == 'MPS':
            raise ValueError(
                'Maximum product spacing doesn\'t yet support tuncation')

        if t is not None and how == 'MSE':
            raise NotImplementedError(
                'Mean square error doesn\'t yet support tuncation')

        if t is not None and how == 'MOM':
            raise ValueError(
                'Maximum product spacing doesn\'t support tuncation')

        if (lfp or zi) & (how != 'MLE'):
            raise ValueError(
                'Limited failure or zero-inflated models can only be made with MLE'
            )

        if (zi & (self.support[0] != 0)):
            raise ValueError(
                "zero-inflated models can only work with models starting at 0")

        x, c, n, t = surpyval.xcnt_handler(x=x,
                                           c=c,
                                           n=n,
                                           t=t,
                                           tl=tl,
                                           tr=tr,
                                           xl=xl,
                                           xr=xr)

        if surpyval.utils.check_no_censoring(c) and (how == 'MOM'):
            raise ValueError('Method of moments doesn\'t support censoring')

        if (surpyval.utils.no_left_or_int(c)) and (how == 'MPP') and (
                not heuristic == 'Turnbull'):
            raise ValueError(
                'Probability plotting estimation with left or interval censoring only works with Turnbull heuristic'
            )

        if (heuristic == 'Turnbull') & (not ((-1 in c) or (2 in c))) & (
            (~np.isfinite(t[:, 1])).any()):
            # The Turnbull method is extremely memory intensive.
            # So if no left or interval censoring and no right-truncation
            # then this is equivalent.
            heuristic = turnbull_estimator

        if (not offset) & (not zi):
            if x.ndim == 2:
                if ((x[:, 0] <= self.support[0]) & (c == 0)).any():
                    raise ValueError(
                        "Observed values must be in support of distribution; are some of your observed values 0, -Inf, or Inf?"
                    )
            else:
                if ((x <= self.support[0]) & (c == 0)).any():
                    raise ValueError(
                        "Observed values must be in support of distribution; are some of your observed values 0, -Inf, or Inf?"
                    )

        # Passed checks
        data = {'x': x, 'c': c, 'n': n, 't': t}

        model = para.Parametric(self, how, data, offset, lfp, zi)
        fitting_info = {}

        if how != 'MPP':
            transform, inv_trans, funcs, inv_f = bounds_convert(
                x, model.bounds)
            const, fixed_idx, not_fixed = fix_idx_and_function(
                fixed, model.param_map, funcs)

            fitting_info['transform'] = transform
            fitting_info['inv_trans'] = inv_trans
            fitting_info['funcs'] = funcs
            fitting_info['inv_f'] = inv_f

            fitting_info['const'] = const
            fitting_info['fixed_idx'] = fixed_idx
            fitting_info['not_fixed'] = not_fixed

            # Need a better general fitter to include offset
            if init == []:
                if self.name in ['Gumbel', 'Beta', 'Normal', 'Uniform']:
                    init = np.array(self._parameter_initialiser(x, c, n))
                else:

                    if x.ndim == 2:
                        init_mask = np.logical_or(x[:, 0] <= self.support[0],
                                                  x[:, 0] >= self.support[1])
                        init_mask = ~np.logical_and(init_mask, c == 0)
                        xl = x[init_mask, 0]
                        xr = x[init_mask, 1]
                        x_init = np.vstack([xl, xr]).T
                    else:
                        init_mask = np.logical_or(x <= self.support[0],
                                                  x >= self.support[1])
                        init_mask = ~np.logical_and(init_mask, c == 0)

                        x_init = x[init_mask]
                    c_init = c[init_mask]
                    n_init = n[init_mask]

                    init = np.array(
                        self._parameter_initialiser(x_init,
                                                    c_init,
                                                    n_init,
                                                    offset=offset))

                    if offset:
                        init[0] = x.min() - 1.

                if lfp:
                    _, _, _, F = nonp.plotting_positions(
                        x, c, n, heuristic='Nelson-Aalen')

                    max_F = np.max(F)

                    if max_F > 0.5:
                        init = np.concatenate([init, [0.99]])
                    else:
                        init = np.concatenate(
                            [init_from_bounds(self), [max_F]])

                if zi:
                    init = np.concatenate(
                        [init, [(n[x == 0]).sum() / n.sum()]])

            init = transform(init)
            init = init[not_fixed]
            fitting_info['init'] = init
        else:
            # Probability plotting method does not need an initial estimate
            fitting_info['rr'] = rr
            fitting_info['heuristic'] = heuristic
            fitting_info['on_d_is_0'] = on_d_is_0
            fitting_info['turnbull_estimator'] = turnbull_estimator
            fitting_info['init'] = None

        model.fitting_info = fitting_info

        results = METHOD_FUNC_DICT[how](model)

        for k, v in results.items():
            setattr(model, k, v)

        if hasattr(model, 'params'):
            for k, v in zip(self.param_names, model.params):
                setattr(model, k, v)

        return model
Example #21
0
def fit_weights_and_save(
        weights_file,
        ca_data_file='rs_vm_denoise_200605.npy',
        opto_silencing_data_file='vip_halo_data_for_sim.npy',
        opto_activation_data_file='vip_chrimson_data_for_sim.npy',
        constrain_wts=None,
        allow_var=True,
        fit_s02=True,
        constrain_isn=True,
        tv=False,
        l2_penalty=0.01,
        init_noise=0.1,
        init_W_from_lsq=False,
        init_W_from_lbfgs=False,
        scale_init_by=1,
        init_W_from_file=False,
        init_file=None,
        correct_Eta=False,
        init_Eta_with_s02=False,
        init_Eta12_with_dYY=False,
        use_opto_transforms=False,
        share_residuals=False,
        stimwise=False,
        simulate1=True,
        simulate2=False,
        help_constrain_isn=True,
        ignore_halo_vip=False,
        verbose=True,
        free_amplitude=False,
        norm_opto_transforms=False,
        zero_extra_weights=None,
        allow_s2=True):

    nsize, ncontrast = 6, 6

    npfile = np.load(ca_data_file, allow_pickle=True)[(
    )]  #,{'rs':rs,'rs_denoise':rs_denoise},allow_pickle=True)
    rs = npfile['rs']
    #rs_denoise = npfile['rs_denoise']

    nsize, ncontrast, ndir = 6, 6, 8
    #ori_dirs = [[0,4],[2,6]] #[[0,4],[1,3,5,7],[2,6]]
    ori_dirs = [[0, 1, 2, 3, 4, 5, 6, 7]]
    nT = len(ori_dirs)
    nS = len(rs[0])

    def sum_to_1(r):
        R = r.reshape((r.shape[0], -1))
        #R = R/np.nansum(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis]
        R = R / np.nansum(R, axis=1)[:, np.newaxis]  # changed 8/28
        return R

    def norm_to_mean(r):
        R = r.reshape((r.shape[0], -1))
        R = R / np.nanmean(R[:, ~np.isnan(R.sum(0))], axis=1)[:, np.newaxis]
        return R

    Rs = [[None, None] for i in range(len(rs))]
    Rso = [[[None for iT in range(nT)] for iS in range(nS)]
           for icelltype in range(len(rs))]
    rso = [[[None for iT in range(nT)] for iS in range(nS)]
           for icelltype in range(len(rs))]

    for iR, r in enumerate(rs):  #rs_denoise):
        #print(iR)
        for ialign in range(nS):
            #Rs[iR][ialign] = r[ialign][:,:nsize,:]
            #sm = np.nanmean(np.nansum(np.nansum(Rs[iR][ialign],1),1))
            #Rs[iR][ialign] = Rs[iR][ialign]/sm
            #print('frac isnan Rs %d,%d: %f'%(iR,ialign,np.isnan(r[ialign]).mean()))
            Rs[iR][ialign] = sum_to_1(r[ialign][:, :nsize, :])
    #         Rs[iR][ialign] = von_mises_denoise(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir)))

    kernel = np.ones((1, 2, 2))
    kernel = kernel / kernel.sum()

    for iR, r in enumerate(rs):
        for ialign in range(nS):
            for iori in range(nT):
                #print('this Rs shape: '+str(Rs[iR][ialign].shape))
                #print('this Rs reshaped shape: '+str(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]].shape))
                #print('this Rs max percent nan: '+str(np.isnan(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]]).mean(-1).max()))
                Rso[iR][ialign][iori] = np.nanmean(
                    Rs[iR][ialign].reshape(
                        (-1, nsize, ncontrast, ndir))[:, :, :, ori_dirs[iori]],
                    -1)
                Rso[iR][ialign][iori][:, :, 0] = np.nanmean(
                    Rso[iR][ialign][iori][:, :, 0],
                    1)[:, np.newaxis]  # average 0 contrast values
                #print('frac isnan pre-conv Rso %d,%d,%d: %f'%(iR,ialign,iori,np.isnan(Rso[iR][ialign][iori]).mean()))
                Rso[iR][ialign][iori][:, 1:, 1:] = ssi.convolve(
                    Rso[iR][ialign][iori], kernel, 'valid')
                Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape(
                    Rso[iR][ialign][iori].shape[0], -1)
                #print('frac isnan Rso %d,%d,%d: %f'%(iR,ialign,iori,np.isnan(Rso[iR][ialign][iori]).mean()))
                #print('sum of Rso isnan: '+str(np.isnan(Rso[iR][ialign][iori]).sum(1)))
                #Rso[iR][ialign][iori] = Rso[iR][ialign][iori]/np.nanmean(Rso[iR][ialign][iori],-1)[:,np.newaxis]

    def set_bound(bd, code, val=0):
        # set bounds to 0 where 0s occur in 'code'
        for iitem in range(len(bd)):
            bd[iitem][code[iitem]] = val

    nN = 36
    nS = 2
    nP = 2
    nT = 1
    nQ = 4

    # code for bounds: 0 , constrained to 0
    # +/-1 , constrained to +/-1
    # 1.5, constrained to [0,1]
    # 2 , constrained to [0,inf)
    # -2 , constrained to (-inf,0]
    # 3 , unconstrained

    Wmx_bounds = 3 * np.ones((nP, nQ), dtype=int)
    Wmx_bounds[0, :] = 2  # L4 PCs are excitatory
    Wmx_bounds[0, 1] = 0  # SSTs don't receive L4 input

    if allow_var:
        Wsx_bounds = 3 * np.ones(
            Wmx_bounds.shape)  #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)
        Wsx_bounds[0, 1] = 0
    else:
        Wsx_bounds = np.zeros(
            Wmx_bounds.shape)  #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)

    Wmy_bounds = 3 * np.ones((nQ, nQ), dtype=int)
    Wmy_bounds[0, :] = 2  # PCs are excitatory
    Wmy_bounds[1:, :] = -2  # all the cell types except PCs are inhibitory
    Wmy_bounds[1, 1] = 0  # SSTs don't inhibit themselves
    # Wmy_bounds[3,1] = 0 # PVs are allowed to inhibit SSTs, consistent with Hillel's unpublished results, but not consistent with Pfeffer et al.
    Wmy_bounds[
        2,
        0] = 0  # VIPs don't inhibit L2/3 PCs. According to Pfeffer et al., only L5 PCs were found to get VIP inhibition

    if not zero_extra_weights is None:
        Wmx_bounds[zero_extra_weights[0]] = 0
        Wmy_bounds[zero_extra_weights[1]] = 0

    if allow_var:
        Wsy_bounds = 3 * np.ones(
            Wmy_bounds.shape)  #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)
        Wsy_bounds[1, 1] = 0
        Wsy_bounds[3, 1] = 0
        Wsy_bounds[2, 0] = 0
    else:
        Wsy_bounds = np.zeros(
            Wmy_bounds.shape)  #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)

    if not constrain_wts is None:
        for wt in constrain_wts:
            Wmy_bounds[wt[0], wt[1]] = 0
            Wsy_bounds[wt[0], wt[1]] = 0

    def tile_nS_nT_nN(kernel):
        row = np.concatenate([kernel for idim in range(nS * nT)],
                             axis=0)[np.newaxis, :]
        tiled = np.concatenate([row for irow in range(nN)], axis=0)
        return tiled

    def set_bounds_by_code(lb, ub, bdlist):
        set_bound(lb, [bd == 0 for bd in bdlist], val=0)
        set_bound(ub, [bd == 0 for bd in bdlist], val=0)

        set_bound(lb, [bd == 2 for bd in bdlist], val=0)

        set_bound(ub, [bd == -2 for bd in bdlist], val=0)

        set_bound(lb, [bd == 1 for bd in bdlist], val=1)
        set_bound(ub, [bd == 1 for bd in bdlist], val=1)

        set_bound(lb, [bd == 1.5 for bd in bdlist], val=0)
        set_bound(ub, [bd == 1.5 for bd in bdlist], val=1)

        set_bound(lb, [bd == -1 for bd in bdlist], val=-1)
        set_bound(ub, [bd == -1 for bd in bdlist], val=-1)

    if allow_s2:
        if fit_s02:
            s02_bounds = 2 * np.ones(
                (nQ, ))  # permitting noise as a free parameter
        else:
            s02_bounds = np.ones((nQ, ))
    else:
        s02_bounds = np.zeros((nQ, ))

    k_bounds = 1.5 * np.ones((nQ * (nS - 1), ))

    #k_bounds[1] = 0 # temporary: spatial kernel constrained to 0 for SST
    #k_bounds[2] = 0 # temporary: spatial kernel constrained to 0 for VIP

    kappa_bounds = np.ones((1, ))
    # kappa_bounds = 2*np.ones((1,))

    T_bounds = 1.5 * np.ones((nQ * (nT - 1), ))

    X_bounds = tile_nS_nT_nN(np.array([2, 1]))
    # X_bounds = np.array([np.array([2,1,2,1])]*nN)

    Xp_bounds = tile_nS_nT_nN(np.array([3, 1]))
    # Xp_bounds = np.array([np.array([3,1,3,1])]*nN)

    # Y_bounds = tile_nS_nT_nN(2*np.ones((nQ,)))
    # # Y_bounds = 2*np.ones((nN,nT*nS*nQ))

    Eta_bounds = tile_nS_nT_nN(3 * np.ones((nQ, )))
    # Eta_bounds = 3*np.ones((nN,nT*nS*nQ))

    if allow_s2:
        if allow_var:
            Xi_bounds = tile_nS_nT_nN(3 * np.ones((nQ, )))
        else:
            Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, )))
    else:
        Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, )))

    # Xi_bounds = 3*np.ones((nN,nT*nS*nQ))

    h1_bounds = -2 * np.ones((1, ))

    h2_bounds = 2 * np.ones((1, ))

    bl_bounds = 3 * np.ones((nQ, ))

    if free_amplitude:
        amp_bounds = 2 * np.ones((nT * nS * nQ, ))
    else:
        amp_bounds = 1 * np.ones((nT * nS * nQ, ))

    # shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ,),(1,),(nN,nS*nP),(nN,nS*nQ),(nN,nS*nQ),(nN,nS*nQ)]
    shapes1 = [(nP, nQ), (nQ, nQ), (nP, nQ),
               (nQ, nQ), (nQ, ), (nQ * (nS - 1), ), (1, ), (nQ * (nT - 1), ),
               (1, ), (1, ), (nQ, ), (nQ * nS * nT, )]
    shapes2 = [(nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ),
               (nN, nT * nS * nQ)]
    #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
    #print('size of shapes2: '+str(np.sum([np.prod(shp) for shp in shapes2])))
    #         Wmx,    Wmy,    Wsx,    Wsy,    s02,  k,    kappa,T,   h1, h2
    #XX,            XXp,          Eta,          Xi

    #bdlist = [Wmx_bounds,Wmy_bounds,Wsx_bounds,Wsy_bounds,s02_bounds,k_bounds,kappa_bounds,T_bounds,X_bounds,Xp_bounds,Eta_bounds,Xi_bounds,h1_bounds,h2_bounds]
    bd1list = [
        Wmx_bounds, Wmy_bounds, Wsx_bounds, Wsy_bounds, s02_bounds, k_bounds,
        kappa_bounds, T_bounds, h1_bounds, h2_bounds, bl_bounds, amp_bounds
    ]
    bd2list = [X_bounds, Xp_bounds, Eta_bounds, Xi_bounds]

    lb1, ub1 = [[sgn * np.inf * np.ones(shp) for shp in shapes1]
                for sgn in [-1, 1]]
    set_bounds_by_code(lb1, ub1, bd1list)
    lb2, ub2 = [[sgn * np.inf * np.ones(shp) for shp in shapes2]
                for sgn in [-1, 1]]
    set_bounds_by_code(lb2, ub2, bd2list)

    #set_bound(lb,[bd==0 for bd in bdlist],val=0)
    #set_bound(ub,[bd==0 for bd in bdlist],val=0)
    #
    #set_bound(lb,[bd==2 for bd in bdlist],val=0)
    #
    #set_bound(ub,[bd==-2 for bd in bdlist],val=0)
    #
    #set_bound(lb,[bd==1 for bd in bdlist],val=1)
    #set_bound(ub,[bd==1 for bd in bdlist],val=1)
    #
    #set_bound(lb,[bd==1.5 for bd in bdlist],val=0)
    #set_bound(ub,[bd==1.5 for bd in bdlist],val=1)
    #
    #set_bound(lb,[bd==-1 for bd in bdlist],val=-1)
    #set_bound(ub,[bd==-1 for bd in bdlist],val=-1)

    # for bd in [lb,ub]:
    #     for ind in [2,3]:
    #         bd[ind][:,1] = 0

    # temporary for no variation expt.
    # lb[2] = np.zeros_like(lb[2])
    # lb[3] = np.zeros_like(lb[3])
    # lb[4] = np.ones_like(lb[4])
    # lb[5] = np.zeros_like(lb[5])
    # ub[2] = np.zeros_like(ub[2])
    # ub[3] = np.zeros_like(ub[3])
    # ub[4] = np.ones_like(ub[4])
    # ub[5] = np.ones_like(ub[5])
    # temporary for no variation expt.
    lb1 = np.concatenate([a.flatten() for a in lb1])
    ub1 = np.concatenate([b.flatten() for b in ub1])
    lb2 = np.concatenate([a.flatten() for a in lb2])
    ub2 = np.concatenate([b.flatten() for b in ub2])
    bounds1 = [(a, b) for a, b in zip(lb1, ub1)]
    bounds2 = [(a, b) for a, b in zip(lb2, ub2)]

    nS = 2
    #print('nT: '+str(nT))
    ndims = 5
    ncelltypes = 5
    Yhat = [[None for iT in range(nT)] for iS in range(nS)]
    Xhat = [[None for iT in range(nT)] for iS in range(nS)]
    Ypc_list = [[None for iT in range(nT)] for iS in range(nS)]
    Xpc_list = [[None for iT in range(nT)] for iS in range(nS)]
    mx = [None for iS in range(nS)]
    for iS in range(nS):
        mx[iS] = np.zeros((ncelltypes, ))
        yy = [None for icelltype in range(ncelltypes)]
        for icelltype in range(ncelltypes):
            yy[icelltype] = np.nanmean(Rso[icelltype][iS][0], 0)
            mx[iS][icelltype] = np.nanmax(yy[icelltype])
        for iT in range(nT):
            y = [
                np.nanmean(Rso[icelltype][iS][iT], axis=0)[:, np.newaxis] /
                mx[iS][icelltype] for icelltype in range(1, ncelltypes)
            ]
            Ypc_list[iS][iT] = [None for icelltype in range(1, ncelltypes)]
            for icelltype in range(1, ncelltypes):
                # as currently written, penalties involving (X,Y)pc_list are effectively artificially smaller by
                # a factor of mx[iS][icelltype] compared to what one would expect from the (X,Y)-penalty as defined
                # subsequently.
                rss = Rso[icelltype][iS][iT].copy(
                )  #/mx[iS][icelltype] #.reshape(Rs[icelltype][ialign].shape[0],-1)
                #print('sum of isnan: '+str(np.isnan(rss).sum(1)))
                #rss = Rso[icelltype][iS][iT].copy() #.reshape(Rs[icelltype][ialign].shape[0],-1)
                rss = rss[np.isnan(rss).sum(1) == 0]
                #         print(rss.max())
                #         rss[rss<0] = 0
                #         rss = rss[np.random.randn(rss.shape[0])>0]
                try:
                    u, s, v = np.linalg.svd(rss - np.mean(rss, 0)[np.newaxis])
                    Ypc_list[iS][iT][icelltype - 1] = [
                        (s[idim], v[idim]) for idim in range(ndims)
                    ]
    #                 print('yep on Y')
    #                 print(np.min(np.sum(rs[icelltype][iS][iT],axis=1)))
                except:
                    print('nope on Y')
                    #print('shape of rss: '+str(rss.shape))
                    #print('mean of rss: '+str(np.mean(np.isnan(rss))))
                    #print('min of this rs: '+str(np.min(np.sum(rs[icelltype][iS][iT],axis=1))))
            Yhat[iS][iT] = np.concatenate(y, axis=1)
            #         x = sim_utils.columnize(Rso[0][iS][iT])[:,np.newaxis]
            icelltype = 0
            #x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]#/mx[iS][icelltype]
            x = np.nanmean(Rso[icelltype][iS][iT],
                           0)[:, np.newaxis] / mx[iS][icelltype]
            #         opto_column = np.concatenate((np.zeros((nN,)),np.zeros((nNO/2,)),np.ones((nNO/2,))),axis=0)[:,np.newaxis]
            Xhat[iS][iT] = np.concatenate((x, np.ones_like(x)), axis=1)
            #         Xhat[iS][iT] = np.concatenate((x,np.ones_like(x),opto_column),axis=1)
            icelltype = 0
            #rss = Rso[icelltype][iS][iT].copy()/mx[iS][icelltype]
            rss = Rso[icelltype][iS][iT].copy()
            rss = rss[np.isnan(rss).sum(1) == 0]
            #         try:
            u, s, v = np.linalg.svd(rss - rss.mean(0)[np.newaxis])
            Xpc_list[iS][iT] = [None for iinput in range(2)]
            Xpc_list[iS][iT][0] = [(s[idim], v[idim]) for idim in range(ndims)]
            Xpc_list[iS][iT][1] = [(0, np.zeros((Xhat[0][0].shape[0], )))
                                   for idim in range(ndims)]
    #         except:
    #             print('nope on X')
    #             print(np.mean(np.isnan(rss)))
    #             print(np.min(np.sum(Rso[icelltype][iS][iT],axis=1)))
    nN, nP = Xhat[0][0].shape
    #print('nP: '+str(nP))
    nQ = Yhat[0][0].shape[1]

    import sim_utils

    pop_rate_fn = sim_utils.f_miller_troyer
    pop_deriv_fn = sim_utils.fprime_miller_troyer

    def compute_f_(Eta, Xi, s02):
        return sim_utils.f_miller_troyer(
            Eta, Xi**2 + np.concatenate([s02 for ipixel in range(nS * nT)]))

    def compute_fprime_m_(Eta, Xi, s02):
        return sim_utils.fprime_miller_troyer(
            Eta, Xi**2 + np.concatenate([s02
                                         for ipixel in range(nS * nT)])) * Xi

    def compute_fprime_s_(Eta, Xi, s02):
        s2 = Xi**2 + np.concatenate((s02, s02), axis=0)
        return sim_utils.fprime_s_miller_troyer(Eta, s2) * (Xi / s2)

    def sorted_r_eigs(w):
        drW, prW = np.linalg.eig(w)
        srtinds = np.argsort(drW)
        return drW[srtinds], prW[:, srtinds]

    #         0.Wmx,  1.Wmy,  2.Wsx,  3.Wsy,  4.s02,5.K,  6.kappa,7.T,8.XX,        9.XXp,        10.Eta,       11.Xi,   12.h1,  13.h2

    shapes1 = [(nP, nQ), (nQ, nQ), (nP, nQ),
               (nQ, nQ), (nQ, ), (nQ * (nS - 1), ), (1, ), (nQ * (nT - 1), ),
               (1, ), (1, ), (nQ, ), (nT * nS * nQ, )]
    shapes2 = [(nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ),
               (nN, nT * nS * nQ)]
    #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
    #print('size of shapes2: '+str(np.sum([np.prod(shp) for shp in shapes2])))

    import calnet.fitting_spatial_feature

    YYhat = calnet.utils.flatten_nested_list_of_2d_arrays(Yhat)
    XXhat = calnet.utils.flatten_nested_list_of_2d_arrays(Xhat)

    opto_dict = np.load(opto_silencing_data_file, allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    Yhat_opto = np.nanmean(np.reshape(Yhat_opto, (nN, 2, nS, 2, nQ)),
                           3).reshape((nN * 2, -1))
    Yhat_opto[0::12] = np.nanmean(Yhat_opto[0::12], axis=0)[np.newaxis]
    Yhat_opto[1::12] = np.nanmean(Yhat_opto[1::12], axis=0)[np.newaxis]
    Yhat_opto = Yhat_opto / np.nanmax(Yhat_opto[0::2], 0)[np.newaxis, :]
    #print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2]

    YYhat_halo = Yhat_opto.reshape((nN, 2, -1))
    opto_transform1 = calnet.utils.fit_opto_transform(
        YYhat_halo, norm01=norm_opto_transforms)

    opto_transform1.res[:, [0, 2, 3, 4, 6, 7]] = 0

    dYY1 = opto_transform1.transform(YYhat) - opto_transform1.preprocess(YYhat)

    #YYhat_halo_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_halo)
    #dYY1 = YYhat_halo_sim[:,1,:] - YYhat_halo_sim[:,0,:]

    def overwrite_plus_n(arr, to_overwrite, n):
        arr[:, to_overwrite] = arr[:, int(to_overwrite + n)]
        return arr

    for to_overwrite in [1, 2]:
        n = 4
        dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \
                = [overwrite_plus_n(x,to_overwrite,n) for x in \
                        [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]]
    for to_overwrite in [7]:
        n = -4
        dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \
                = [overwrite_plus_n(x,to_overwrite,n) for x in \
                        [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]]

    if ignore_halo_vip:
        dYY1[:, 2::nQ] = np.nan

    #for to_overwrite in [1,2]:
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite+4]
    #for to_overwrite in [7]:
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite-4]

    #Yhat_opto = opto_dict['Yhat_opto']
    #for iS in range(nS):
    #    mx = np.zeros((nQ,))
    #    for iQ in range(nQ):
    #        slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ)
    #        mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer])
    #        Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ]
    ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    #print(Yhat_opto.shape)
    #h_opto = opto_dict['h_opto']
    #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2]
    #for to_overwrite in [1,2,5,6]: # overwrite sst and vip with off-centered values
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite+8]
    #for to_overwrite in [11,15]:
    #    dYY1[:,to_overwrite] = np.nan #dYY1[:,to_overwrite-8]

    opto_dict = np.load(opto_activation_data_file, allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    Yhat_opto = np.nanmean(np.reshape(Yhat_opto, (nN, 2, nS, 2, nQ)),
                           3).reshape((nN * 2, -1))
    Yhat_opto[0::12] = np.nanmean(Yhat_opto[0::12], axis=0)[np.newaxis]
    Yhat_opto[1::12] = np.nanmean(Yhat_opto[1::12], axis=0)[np.newaxis]
    Yhat_opto = Yhat_opto / Yhat_opto[0::2].max(0)[np.newaxis, :]
    #print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2]

    YYhat_chrimson = Yhat_opto.reshape((nN, 2, -1))
    opto_transform2 = calnet.utils.fit_opto_transform(
        YYhat_chrimson, norm01=norm_opto_transforms)
    dYY2 = opto_transform2.transform(YYhat) - opto_transform2.preprocess(YYhat)
    #YYhat_chrimson_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_chrimson)
    #dYY2 = YYhat_chrimson_sim[:,1,:] - YYhat_chrimson_sim[:,0,:]

    #Yhat_opto = opto_dict['Yhat_opto']
    #for iS in range(nS):
    #    mx = np.zeros((nQ,))
    #    for iQ in range(nQ):
    #        slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ)
    #        mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer])
    #        Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ]
    ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    #print(Yhat_opto.shape)
    #h_opto = opto_dict['h_opto']
    #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2]

    #print('dYY1 mean: %03f'%np.nanmean(np.abs(dYY1)))
    #print('dYY2 mean: %03f'%np.nanmean(np.abs(dYY2)))

    dYY = np.concatenate((dYY1, dYY2), axis=0)

    #titles = ['VIP silencing','VIP activation']
    #for itype in [0,1,2,3]:
    #    plt.figure(figsize=(5,2.5))
    #    for iyy,dyy in enumerate([dYY1,dYY2]):
    #        plt.subplot(1,2,iyy+1)
    #        if np.sum(np.isnan(dyy[:,itype]))==0:
    #            sca.scatter_size_contrast(YYhat[:,itype],YYhat[:,itype]+dyy[:,itype],nsize=6,ncontrast=6)#,mn=0)
    #        plt.title(titles[iyy])
    #        plt.xlabel('cell type %d event rate, \n light off'%itype)
    #        plt.ylabel('cell type %d event rate, \n light on'%itype)
    #        ut.erase_top_right()
    #    plt.tight_layout()
    #    ut.mkdir('figures')
    #    plt.savefig('figures/scatter_light_on_light_off_target_celltype_%d.eps'%itype)

    opto_mask = ~np.isnan(dYY)

    #dYY[nN:][~opto_mask[nN:]] = -dYY[:nN][~opto_mask[nN:]]

    #print('mean of opto_mask: '+str(opto_mask.mean()))

    #dYY[~opto_mask] = 0
    def zero_nans(arr):
        arr[np.isnan(arr)] = 0
        return arr

    #dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\
    #        opto_transform2.slope,opto_transform2.intercept,opto_transform2.res\
    #        = [zero_nans(x) for x in \
    #                [dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\
    #                opto_transform2.slope,opto_transform2.intercept,opto_transform2.res]]
    dYY = zero_nans(dYY)

    to_adjust = np.logical_or(np.isnan(opto_transform2.slope[0]),
                              np.isnan(opto_transform2.intercept[0]))

    opto_transform2.slope[:,
                          to_adjust] = 1 / opto_transform1.slope[:, to_adjust]
    opto_transform2.intercept[:,
                              to_adjust] = -opto_transform1.intercept[:,
                                                                      to_adjust] / opto_transform1.slope[:,
                                                                                                         to_adjust]
    opto_transform2.res[:,
                        to_adjust] = -opto_transform1.res[:,
                                                          to_adjust] / opto_transform1.slope[:,
                                                                                             to_adjust]

    #np.save('/Users/dan/Documents/notebooks/mossing-PC/shared_data/calnet_data/dYY.npy',dYY)

    from importlib import reload
    reload(calnet)
    #reload(calnet.fitting_2step_spatial_feature_opto_tight_nonlinear)
    reload(sim_utils)
    # reload(calnet.fitting_spatial_feature)
    # W0list = [np.ones(shp) for shp in shapes]
    wt_dict = {}
    wt_dict['X'] = 3  #1
    wt_dict['Y'] = 3
    #wt_dict['Eta'] = 3 # 1 #
    wt_dict['Xi'] = 0.1
    wt_dict['stims'] = np.ones((nN, 1))  #(np.arange(30)/30)[:,np.newaxis]**1 #
    wt_dict['barrier'] = 0.  #30.0 #0.1
    wt_dict['opto'] = 1  #1e1
    wt_dict['isn'] = 0.3
    wt_dict['tv'] = 1
    spont_frac = 0.5
    pc_frac = 0.5
    wt_dict['stimsOpto'] = (1 - spont_frac) * 6 / 5 * np.ones((nN, 1))
    wt_dict['stimsOpto'][0::6] = spont_frac * 6
    wt_dict['celltypesOpto'] = (1 - pc_frac) * 4 / 3 * np.ones(
        (1, nQ * nS * nT))
    wt_dict['celltypesOpto'][0, 0::nQ] = pc_frac * 4
    wt_dict['dirOpto'] = np.array((1, 0.3))
    wt_dict['dYY'] = 10  #10
    wt_dict['coupling'] = 1e-3
    wt_dict['smi'] = 0.1
    wt_dict['smi_halo'] = 30
    wt_dict['smi_chrimson'] = 0.1

    ##temporary no_opto
    wt_dict['opto'] = 0
    wt_dict['dirOpto'] = np.array((1, 1))
    #wt_dict['stimsOpto'] = np.ones((nN,1))
    wt_dict['celltypesOpto'] = np.ones((1, nQ * nS * nT))
    wt_dict['smi'] = 0  #0.01 # 0
    wt_dict['smi_halo'] = 0  #1 # 0
    wt_dict['smi_chrimson'] = 0  #0.01 # 0
    wt_dict['isn'] = 0.1
    wt_dict['tv'] = 0.1
    wt_dict['X'] = 3
    wt_dict['Eta'] = 10  #3 # 1 #

    ## temporary opto from no_opto
    #wt_dict['opto'] = 0.01
    #wt_dict['tv'] = 0.3#0.1

    np.save(
        'XXYYhat.npy', {
            'YYhat': YYhat,
            'XXhat': XXhat,
            'rs': rs,
            'Rs': Rs,
            'Rso': Rso,
            'Ypc_list': Ypc_list,
            'Xpc_list': Xpc_list
        })
    if allow_s2:
        Eta0 = invert_f_mt(YYhat)
    else:
        Eta0 = invert_f_mt(YYhat, s02=0)

    #         Wmx,    Wmy,    Wsx,    Wsy,    s02,  k,    kappa,T,   h1, h2
    #XX,            XXp,          Eta,          Xi

    opt = fmc.gen_opt(nS=nS, nT=nT)
    opt['allow_s02'] = False
    opt['allow_A'] = False
    opt['allow_B'] = True

    ntries = 1
    nhyper = 1
    dt = 1e-1
    niter = int(np.round(10 / dt))  #int(1e4)
    perturbation_size = 5e-2
    # learning_rate = 1e-4 # 1e-5 #np.linspace(3e-4,1e-3,niter+1) # 1e-5
    #l2_penalty = 0.1
    W1t = [[None for itry in range(ntries)] for ihyper in range(nhyper)]
    W2t = [[None for itry in range(ntries)] for ihyper in range(nhyper)]
    loss = np.zeros((nhyper, ntries))
    is_neg = np.array([b[1] for b in bounds1]) == 0
    counter = 0
    negatize = [np.zeros(shp, dtype='bool') for shp in shapes1]
    #print(shapes1)
    for ishp, shp in enumerate(shapes1):
        nel = np.prod(shp)
        negatize[ishp][:][is_neg[counter:counter + nel].reshape(shp)] = True
        counter = counter + nel
    for ihyper in range(nhyper):
        for itry in range(ntries):
            #print((ihyper,itry))
            #[0.(nP,nQ),1.(nQ,nQ),2.(nP,nQ),3.(nQ,nQ),4.(nQ,),5.(nQ*(nS-1),),6.(1,),7.(nQ*(nT-1),),8.(1,),9.(1,),10.(nQ,),11.(nQ*nS*nT,)]
            W10list = [
                init_noise * (ihyper + 1) * np.random.rand(*shp)
                for shp in shapes1
            ]
            W20list = [
                init_noise * (ihyper + 1) * np.random.rand(*shp)
                for shp in shapes2
            ]
            #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
            #print('size of w10: '+str(np.sum([np.size(x) for x in W10list])))
            #print('len(W10list) : '+str(len(W10list)))
            counter = 0
            for ishp, shp in enumerate(shapes1):
                W10list[ishp][negatize[ishp]] = -W10list[ishp][negatize[ishp]]
            W10list[4] = np.ones(shapes1[4])  # s02
            W10list[5] = np.ones(shapes1[5])  # K
            W10list[6] = np.ones(shapes1[6])  # kappa
            W10list[7] = np.ones(shapes1[7])  # T
            W10list[8] = np.zeros(shapes1[8])  # h1
            W10list[9] = np.zeros(shapes1[9])  # h2
            W10list[10] = np.zeros(shapes1[10])  # baseline
            W10list[11] = np.ones(shapes1[11])  # amplitude
            W20list[0] = np.concatenate(Xhat, axis=1)  #XX
            W20list[1] = np.zeros_like(W20list[1])  #XXp
            W20list[2] = Eta0.copy()  #np.zeros(shapes[10]) #Eta
            W20list[3] = np.zeros(shapes2[3])  #Xi
            #[Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi]
            if init_W_from_lsq:
                W10list[0], W10list[1] = initialize_W(Xhat,
                                                      Yhat,
                                                      scale_by=scale_init_by,
                                                      allow_s2=allow_s2)
                for ivar in range(0, 2):
                    W10list[
                        ivar] = W10list[ivar] + init_noise * np.random.randn(
                            *W10list[ivar].shape)
            if init_W_from_lbfgs:
                print(opt)
                opt_param, result, _, _, _, _, _, _, _, _, _, _, _ = fmc.initialize_params(
                    XXhat, YYhat, opt, wpcpc=5, wpvpv=-6)
                these_shapes = [(nP, nQ), (nQ, nQ), (nQ, ), (nQ, ), (nQ, ),
                                (nQ, )]
                Wmx0, Wmy0, K0, s020, amplitude0, baseline0 = calnet.utils.parse_thing(
                    opt_param, these_shapes)
                if init_Eta_with_s02:
                    #assert(True==False)
                    Eta0 = invert_f_mt_with_s02(YYhat -
                                                np.tile(baseline0, nS * nT),
                                                s020,
                                                nS=nS,
                                                nT=nT)
                    W20list[2] = Eta0.copy()
                #Wmx0 = opt_param[:nP]
                #Wmy0 = opt_param[nP:nP+nQ]
                #K0 = opt_param[nP+nQ]
                #s020 = opt_param[nP+nQ+1]
                #amplitude0 = opt_param[nP+nQ+2]
                #baseline0 = opt_param[nP+nQ+3]
                print((Wmx0, Wmy0, K0, s020, np.tile(amplitude0,
                                                     2), baseline0))
                W10list[0], W10list[1], W10list[5], W10list[4], W10list[
                    -1], W10list[-2] = Wmx0, Wmy0, K0, s020, np.tile(
                        amplitude0, 2), baseline0
                for ivar in range(0, 2):
                    W10list[
                        ivar] = W10list[ivar] + init_noise * np.random.randn(
                            *W10list[ivar].shape)
            elif constrain_isn:
                W10list[1][0, 0] = 3
                if help_constrain_isn:
                    W10list[1][0, 3] = 5
                    W10list[1][3, 0] = -5
                    W10list[1][3, 3] = -5
                else:
                    W10list[1][0, 1:4] = 5
                    W10list[1][1:4, 0] = -5

            if init_W_from_file:
                npyfile = np.load(init_file, allow_pickle=True)[()]

                #Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,h1,h2,bl,amp = parse_W1(W1)
                #XX,XXp,Eta,Xi = parse_W2(W2)
                #Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,bl,amp = parse_W1(W1)
                W10list = [
                    npyfile['as_list'][ivar]
                    for ivar in [0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15]
                ]
                W20list = [npyfile['as_list'][ivar] for ivar in [8, 9, 10, 11]]
                if W20list[0].size == nN * nS * 2 * nP:
                    #assert(True==False)
                    W10list[7] = np.array(())
                    W10list[1][1, 0] = W10list[1][1, 0]
                    W20list[0] = np.nanmean(
                        W20list[0].reshape((nN, nS, 2, nP)), 2).flatten()  #XX
                    W20list[1] = np.nanmean(
                        W20list[1].reshape((nN, nS, 2, nP)), 2).flatten()  #XXp
                    W20list[2] = np.nanmean(
                        W20list[2].reshape((nN, nS, 2, nQ)), 2).flatten()  #Eta
                    W20list[3] = np.nanmean(
                        W20list[3].reshape((nN, nS, 2, nQ)), 2).flatten()  #Xi
                if correct_Eta:
                    #assert(True==False)
                    W20list[2] = Eta0.copy()
                if len(W10list) < len(shapes1):
                    #assert(True==False)
                    W10list = W10list + [
                        np.array(1),
                        np.zeros((nQ, )),
                        np.zeros((nT * nS * nQ, ))
                    ]  # add h2, bl, amp
                if init_Eta_with_s02:
                    #assert(True==False)
                    s02 = W10list[4].copy()
                    Eta0 = invert_f_mt_with_s02(YYhat, s02, nS=nS, nT=nT)
                    W20list[2] = Eta0.copy()
                #if init_Eta12_with_dYY:
                #    Eta0 = W20list[2].copy().reshape((nN,nQ*nS*nT))
                #    Xi0 = W20list[3].copy().reshape((nN,nQ*nS*nT))
                #    s020 = W10list[4].copy()
                #    YY0s = compute_f_(Eta0,Xi0,s020)
                #titles = ['VIP silencing','VIP activation']
                #for itype in [0,1,2,3]:
                #    plt.figure(figsize=(5,2.5))
                #    for iyy,yy in enumerate([YY10s,YY20s]):
                #        plt.subplot(1,2,iyy+1)
                #        if np.sum(np.isnan(yy[:,itype]))==0:
                #            sca.scatter_size_contrast(YY0s[:,itype],yy[:,itype],nsize=6,ncontrast=6)#,mn=0)
                #        plt.title(titles[iyy])
                #        plt.xlabel('cell type %d event rate, \n light off'%itype)
                #        plt.ylabel('cell type %d event rate, \n light on'%itype)
                #        ut.erase_top_right()
                #    plt.tight_layout()
                #    ut.mkdir('figures')
                #    plt.savefig('figures/scatter_light_on_light_off_init_celltype_%d.eps'%itype)
                for ivar in [0, 1, 4, 5]:  # Wmx, Wmy, s02, k
                    print(init_noise)
                    W10list[
                        ivar] = W10list[ivar] + init_noise * np.random.randn(
                            *W10list[ivar].shape)

            #print('size of bounds1: '+str(np.sum([np.size(x) for x in bd1list])))
            #print('size of w10: '+str(np.sum([np.size(x) for x in W10list])))
            #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
            W1t[ihyper][itry], W2t[ihyper][itry], loss[ihyper][
                itry], gr, hess, result = calnet.fitting_2step_spatial_feature_opto_tight_nonlinear_baseline.fit_W_sim(
                    Xhat,
                    Xpc_list,
                    Yhat,
                    Ypc_list,
                    pop_rate_fn=pop_rate_fn,
                    pop_deriv_fn=pop_deriv_fn,
                    W10list=W10list.copy(),
                    W20list=W20list.copy(),
                    bounds1=bounds1,
                    bounds2=bounds2,
                    niter=niter,
                    wt_dict=wt_dict,
                    l2_penalty=l2_penalty,
                    compute_hessian=False,
                    dt=dt,
                    perturbation_size=perturbation_size,
                    dYY=dYY,
                    constrain_isn=constrain_isn,
                    tv=tv,
                    opto_mask=opto_mask,
                    use_opto_transforms=use_opto_transforms,
                    opto_transform1=opto_transform1,
                    opto_transform2=opto_transform2,
                    share_residuals=share_residuals,
                    stimwise=stimwise,
                    simulate1=simulate1,
                    simulate2=simulate2,
                    verbose=verbose)

    #def parse_W(W):
    #    Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = W
    #    return Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2
    def parse_W1(W):
        Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp = W
        return Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp

    def parse_W2(W):
        XX, XXp, Eta, Xi = W
        return XX, XXp, Eta, Xi

    itry = 0
    Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp = parse_W1(W1t[0][0])
    XX, XXp, Eta, Xi = parse_W2(W2t[0][0])

    labels1 = [
        'Wmx', 'Wmy', 'Wsx', 'Wsy', 's02', 'K', 'kappa', 'T', 'h1', 'h2', 'bl',
        'amp'
    ]
    labels2 = ['XX', 'XXp', 'Eta', 'Xi']
    Wstar_dict = {}
    for i, label in enumerate(labels1):
        Wstar_dict[label] = W1t[0][0][i]
    for i, label in enumerate(labels2):
        Wstar_dict[label] = W2t[0][0][i]
    Wstar_dict['as_list'] = [
        Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, bl, amp
    ]
    Wstar_dict['loss'] = loss[0][0]
    Wstar_dict['wt_dict'] = wt_dict
    np.save(weights_file, Wstar_dict, allow_pickle=True)
Example #22
0
def MI2AMI(y, n_clusters, r, k, init, var_distrib, nj,\
          nan_mask, target_nb_pseudo_obs = 500, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\
          dm = [], max_patience = 1): # dm: Hack to remove
    ''' Complete the missing values using a trained M1DGMM
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise
    target_nb_pseudo_obs (int): The number of pseudo-observations to generate         
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    dm (np array): The distance matrix of the observations. If not given M1DGMM computes it
    n_neighbors (int): The number of neighbors to use for NA imputation
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    # !!! Hack
    cols = y.columns
    # Formatting
    if not isinstance(nan_mask, np.ndarray): nan_mask = np.asarray(nan_mask)
    if not isinstance(y, np.ndarray): y = np.asarray(y)

    assert len(k) < 2  # Not implemented for deeper MDGMM for the moment

    # Keep complete observations
    complete_y = y[~np.isnan(y.astype(float)).any(1)]
    completed_y = deepcopy(y)

    out = M1DGMM(complete_y, 'auto', r, k, init, var_distrib, nj, it,\
             eps, maxstep, seed, perform_selec = perform_selec,\
                 dm = dm, max_patience = max_patience, use_silhouette = True)

    # Compute the associations
    vc = vars_contributions(pd.DataFrame(complete_y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \
                           title = 'Contribution of the variables to the latent dimensions',\
                           storage_path = None)

    # Upacking the model from the M1DGMM output
    #p = y.shape[1]
    k = out['best_k']
    r = out['best_r']
    mu = out['mu'][0]
    lambda_bin = np.array(out['lambda_bin'])
    lambda_ord = out['lambda_ord']
    lambda_categ = out['lambda_categ']
    lambda_cont = np.array(out['lambda_cont'])

    nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli',
                                             'binomial'])].astype(int)
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nj_categ = nj[var_distrib == 'categorical'].astype(int)

    nb_cont = np.sum(var_distrib == 'continuous')
    nb_bin = np.sum(var_distrib == 'binomial')

    y_std = complete_y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\
                                                                    keepdims = True)
    cat_features = var_distrib != 'categorical'

    # Compute the associations between variables and use them as weights for the optimisation
    assoc = cosine_similarity(vc, dense_output=True)
    np.fill_diagonal(assoc, 0.0)
    assoc = np.abs(assoc)
    weights = (assoc / assoc.sum(1, keepdims=True))

    #==============================================
    # Optimisation sandbox
    #==============================================

    # Define the observation generated by the center of each cluster
    cluster_obs = [impute(mu[kk,:,0], var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                 lambda_ord, nj_ord, lambda_cont, y_std) for kk in range(k[0])]

    # Use only of the observed variables as references
    types = {'bin': ['bernoulli', 'binomial'], 'categ': ['categorical'],\
             'cont': ['continuous'], 'ord': 'ordinal'}

    # Gradient optimisation
    nan_indices = np.where(nan_mask.any(1))[0]
    imputed_y = np.zeros_like(y)
    numobs = y.shape[0]

    #************************************
    # Linear constraint to stay in the support of continuous variables
    #************************************

    lb = np.array([])
    ub = np.array([])
    A = np.array([[]]).reshape((0, r[0]))

    if nb_bin > 0:
        ## Corrected Binomial bounds (ub is actually +inf)
        bin_indices = var_distrib[np.logical_or(var_distrib == 'bernoulli',
                                                var_distrib == 'binomial')]
        binomial_indices = bin_indices == 'binomial'

        lb_bin = np.nanmin(y[:, var_distrib == 'binomial'], 0)
        lb_bin = logit(
            lb_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        ub_bin = np.nanmax(y[:, var_distrib == 'binomial'], 0)
        ub_bin = logit(
            ub_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        A_bin = lambda_bin[binomial_indices, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_bin])
        ub = np.concatenate([ub, ub_bin])
        A = np.concatenate([A, A_bin], axis=0)

    if nb_cont > 0:
        ## Corrected Gaussian bounds
        lb_cont = np.nanmin(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        ub_cont = np.nanmax(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        A_cont = lambda_cont[:, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_cont])
        ub = np.concatenate([ub, ub_cont])
        A = np.concatenate([A, A_cont], axis=0)

    lc = LinearConstraint(A, lb, ub, keep_feasible=True)

    zz = []
    fun = []
    for i in range(numobs):
        if i in nan_indices:

            # Design the nan masks for the optimisation process
            nan_mask_i = nan_mask[i]
            weights_i = weights[nan_mask_i].mean(0)

            # Look for the best starting point
            cluster_dist = [error(y[i, ~nan_mask_i], obs[~nan_mask_i],\
                            cat_features[~nan_mask_i], weights_i)\
                            for obs in cluster_obs]
            z02 = mu[np.argmin(cluster_dist), :, 0]

            # Formatting
            vars_i = {type_alias: np.where(~nan_mask_i[np.isin(var_distrib, vartype)])[0] \
                             for type_alias, vartype in types.items()}

            complete_categ = [
                l for idx, l in enumerate(lambda_categ)
                if idx in vars_i['categ']
            ]
            complete_ord = [
                l for idx, l in enumerate(lambda_ord) if idx in vars_i['ord']
            ]

            opt = minimize(stat_all, z02, \
                   args = (y[i, ~nan_mask_i], var_distrib[~nan_mask_i],\
                   weights_i[~nan_mask_i],\
                   lambda_bin[vars_i['bin']], nj_bin[vars_i['bin']],\
                   complete_categ,\
                   nj_categ[vars_i['categ']],\
                   complete_ord,\
                   nj_ord[vars_i['ord']],\
                   lambda_cont[vars_i['cont']], y_std[:, vars_i['cont']]),
                   tol = eps, method='trust-constr', jac = grad_stat,\
                   constraints = lc,
                   options = {'maxiter': 1000})

            z = opt.x
            zz.append(z)
            fun.append(opt.fun)

            imputed_y[i] = impute(z, var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                         lambda_ord, nj_ord, lambda_cont, y_std)

        else:
            imputed_y[i] = y[i]

    completed_y = np.where(nan_mask, imputed_y, y)

    out['completed_y'] = completed_y
    out['zz'] = zz
    out['fun'] = fun
    return (out)