Example #1
0
def compute_rho(eta, H, psi, mu_s, sigma_s, z_c, chsi):
    ''' Compute rho as defined in equation (8) of the DGMM paper 
    eta (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu 
                                                    parameters for each layer    
    H (list of nb_layers elements of shape (K_l x r_{l-1}, r_l)): Lambda 
                                                    parameters for each layer
    psi (list of nb_layers elements of shape (K_l x r_{l-1}, r_{l-1})): Psi 
                                                    parameters for each layer
    z_c (list of nd-arrays) z^{(l)} - eta^{(l)} for each layer. 
    chsi (list of nd-arrays): The chsi parameters for each layer
    -----------------------------------------------------------------------
    returns (list of ndarrays): The rho parameters (covariance matrices) 
                                    for all paths starting at each layer
    '''
    
    L = len(H)    
    rho = [0 for i in range(L)]
    k = [len(h) for h in H]
    k_aug = k + [1] 

    for l in range(0, L):
        sigma_next_l = np.tile(sigma_s[l + 1], (k[l], 1, 1))
        mu_next_l = np.tile(mu_s[l + 1], (k[l], 1, 1))

        HxPsi_inv = t(H[l], (0, 2, 1)) @ pinv(psi[l])
        HxPsi_inv = np.repeat(HxPsi_inv, np.prod(k_aug[l + 1: ]), axis = 0)

        rho[l] = chsi[l][n_axis] @ (HxPsi_inv[n_axis] @ z_c[l][..., n_axis] \
                                    + (pinv(sigma_next_l) @ mu_next_l)[n_axis])
                
    return rho
Example #2
0
def identifiable_estim_DGMM(eta_old, H_old, psi_old, Ez, AT):
    ''' Enforce identifiability conditions for DGMM estimators
    eta_old (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu  
                        estimators of the previous iteration for each layer
    H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda 
                        estimators of the previous iteration for each layer
    psi_old (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi 
                        estimators of the previous iteration for each layer
    Ez1 (list of ndarrays): E(z^{(l)}) for all l
    AT (list of ndarrays): Var(z^{(1)})^{-1/2 T} for all l
    -------------------------------------------------------------------------
    returns (tuple of length 3): "identifiable" estimators of eta, Lambda and 
                                Psi (1st condition)
    '''

    L = len(eta_old)

    eta_new = [[] for l in range(L)]
    H_new = [[] for l in range(L)]
    psi_new = [[] for l in range(L)]

    for l in reversed(range(L)):
        inv_AT = pinv(AT[l])

        # Identifiability
        psi_new[l] = inv_AT @ psi_old[l] @ t(inv_AT, (0, 2, 1))
        H_new[l] = inv_AT @ H_old[l]
        eta_new[l] = inv_AT @ (eta_old[l] - Ez[l])

    return eta_new, H_new, psi_new
Example #3
0
def identifiable_estim_DDGMM(eta_old, H_old, psi_old, Ez, AT):
    ''' Ensure that the latent variables are centered reduced 
    (1st DGMM identifiability condition)
    
    eta_old (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu  
                        estimators of the previous iteration for each layer
    H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda 
                        estimators of the previous iteration for each layer
    psi_old (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi 
                        estimators of the previous iteration for each layer
    Ez1 (list of (k_l, r_l) ndarray): E(z^{(l)})
    AT (list of (k_l, k_l) ndarray): Var(z^{(l)})^{-1/2 T}
    -------------------------------------------------------------------------
    returns (tuple of length 3): "DDGMM identifiable" estimators of eta, Lambda and Psi
    '''

    L = len(eta_old)

    eta_new = [[] for l in range(L)]
    H_new = [[] for l in range(L)]
    psi_new = [[] for l in range(L)]

    for l in reversed(range(L)):
        inv_AT = pinv(AT[l])

        # Identifiability
        psi_new[l] = inv_AT @ psi_old[l] @ t(inv_AT, (0, 2, 1))
        H_new[l] = inv_AT @ H_old[l]
        eta_new[l] = inv_AT @ (eta_old[l] - Ez[l])

    return eta_new, H_new, psi_new
Example #4
0
def compute_chsi(H, psi, mu_s, sigma_s):
    ''' Compute chsi as defined in equation (8) of the DGMM paper 
    H (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda 
                                                    parameters for each layer
    psi (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi 
                                                    parameters for each layer
    mu_s (list of nd-arrays): The means of the Gaussians starting at each layer
    sigma_s (list of nd-arrays): The covariance matrices of the Gaussians 
                                                    starting at each layer
    ------------------------------------------------------------------------------------------------
    returns (list of ndarray): The chsi parameters for all paths starting at each layer
    '''
    L = len(H)
    k = [len(h) for h in H]
    
    #=====================================================================
    # Initiating the parameters for all layers
    #=====================================================================
    
    # Initialization with the parameters of the last layer    
    chsi = [0 for i in range(L)]
    chsi[-1] = pinv(pinv(sigma_s[-1]) + t(H[-1], (0, 2, 1)) @ pinv(psi[-1]) @ H[-1]) 

    #==================================================================================
    # Compute chsi from top to bottom 
    #==================================================================================
        
    for l in range(L - 1):
        Ht_psi_H = t(H[l], (0, 2, 1)) @ pinv(psi[l]) @ H[l]
        Ht_psi_H = np.repeat(Ht_psi_H, np.prod(k[l + 1:]), axis = 0)
        
        sigma_next_l = np.tile(sigma_s[l + 1], (k[l], 1, 1))
        chsi[l] = pinv(pinv(sigma_next_l) + Ht_psi_H)
            
    return chsi
Example #5
0
def diagonal_cond(H_old, psi_old):
    ''' Ensure that Lambda^T Psi^{-1} Lambda is diagonal
    H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): The previous
                                        iteration values of Lambda estimators
    psi_old (list of ndarrays): The previous iteration values of Psi estimators
                    (list of nb_layers elements of shape (K_l x r_l-1, r_l-1))
    ------------------------------------------------------------------------
    returns (list of ndarrays): An "identifiable" H estimator (2nd condition)                                          
    '''
    L = len(H_old)

    H = []
    for l in range(L):
        B = np.transpose(H_old[l], (0, 2, 1)) @ pinv(psi_old[l]) @ H_old[l]
        values, vec = eigh(B)
        H.append(H_old[l] @ vec)
    return H
Example #6
0
def M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H_old, k):
    ''' 
    Compute the estimators of eta, Lambda and Psi for all components and all layers
    Ez_ys (list of ndarrays): E(z^{(l)} | y, s) for all (l,s)
    E_z1z2T_ys (list of ndarrays):  E(z^{(l)}z^{(l+1)T} | y, s) 
    E_z1z2T_ys (list of ndarrays):  E(z^{(l+1)}z^{(l+1)T} | y, s) 
    EeeT_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s), 
            E(e | y, s) with e = z^{(l)} - eta{k_l}^{(l)} - Lambda @ z^{(l + 1)}
    ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega
    H_old (list of ndarrays): The previous iteration values of Lambda estimators
    k (list of int): The number of component on each layer
    --------------------------------------------------------------------------
    returns (list of ndarrays): The new estimators of eta, Lambda and Psi 
                                            for all components and all layers
    '''
    epsilon = 1E-14

    L = len(E_z1z2T_ys)
    r = [Ez_ys[l].shape[2] for l in range(L + 1)]
    numobs = len(Ez_ys[0])

    eta = []
    H = []
    psi = []

    for l in range(L):
        Ez1_ys_l = Ez_ys[l].reshape(numobs, *k, r[l], order='C')
        Ez2_ys_l = Ez_ys[l + 1].reshape(numobs, *k, r[l + 1], order='C')
        E_z1z2T_ys_l = E_z1z2T_ys[l].reshape(numobs,
                                             *k,
                                             r[l],
                                             r[l + 1],
                                             order='C')
        E_z2z2T_ys_l = E_z2z2T_ys[l].reshape(numobs,
                                             *k,
                                             r[l + 1],
                                             r[l + 1],
                                             order='C')
        EeeT_ys_l = EeeT_ys[l].reshape(numobs, *k, r[l], r[l], order='C')

        # Sum all the path going through the layer
        idx_to_sum = tuple(set(range(1, L + 1)) - set([l + 1]))
        ps_yl = ps_y.reshape(numobs, *k,
                             order='C').sum(idx_to_sum)[..., n_axis, n_axis]

        # Compute common denominator
        den = ps_yl.sum(0)
        den = np.where(den < epsilon, epsilon, den)

        # eta estimator
        eta_num = Ez1_ys_l.sum(idx_to_sum)[..., n_axis] -\
            H_old[l][n_axis] @ Ez2_ys_l.sum(idx_to_sum)[..., n_axis]
        eta_new = (ps_yl * eta_num).sum(0) / den

        eta.append(eta_new)

        # Lambda estimator
        H_num = E_z1z2T_ys_l.sum(idx_to_sum) - \
            eta_new[n_axis] @ np.expand_dims(Ez2_ys_l.sum(idx_to_sum), 2)

        H_new = (ps_yl *
                 H_num @ pinv(E_z2z2T_ys_l.sum(idx_to_sum))).sum(0) / den
        H.append(H_new)

        # Psi estimator
        psi_new = (ps_yl * EeeT_ys_l.sum(idx_to_sum)).sum(0) / den
        psi.append(psi_new)

    return eta, H, psi
Example #7
0
def M_step_DGMM_t(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, \
                  ps_y_c, ps_y_d, pst_yCyD, H_old, S_1L, k_1L, L_1L, L, rh):
    ''' 
    Compute the estimators of eta, Lambda and Psi for all components and all layers of the tail
    Ez_ys (list of ndarrays): E(z^{(l)} | y, s) for all (l,s)
    E_z1z2T_ys (list of ndarrays):  E(z^{(l)}z^{(l+1)T} | y, s) 
    E_z2z2T_ys (list of ndarrays):  E(z^{(l+1)}z^{(l+1)T} | y, s) 
    EeeT_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s), 
            E(e | y, s) with e = z^{(l)} - eta{k_l}^{(l)} - Lambda @ z^{(l + 1)}
    ps_y_* ((numobs, S) nd-array): p(s | y) for all s in Omega^*
    pst_yCyD ((numobs, S) nd-array): p(s^t | y) for all s in the tail
    H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda 
                        estimators of the previous iteration for each layer
    S_1L (dict): The number of paths starting at each layer until the common tail
    k_1L (list of int): The number of component on each layer including the common tail
    L_1L (dict): The number of layers where the lists include the heads and the tail layers
    L (dict): The number of layers in the networks (to delete in future versions)
    rh (list): The dimension of each layer of the head h
    --------------------------------------------------------------------------
    returns (tuple of length 4): The new estimators of eta, Lambda and Psi 
                                            for all components and all layers
                                            and the associated latent expectancy 
    '''

    Lh = len(E_z1z2T_ys)
    numobs = len(Ez_ys[0])

    eta = []
    H = []
    psi = []

    #==========================================================================
    # Broadcast path probabilities
    #==========================================================================

    psc_y = ps_y_c.reshape(numobs, S_1L['c'][0] // S_1L['t'][0],\
                 S_1L['t'][0], order = 'C')
    psd_y = ps_y_d.reshape(numobs, S_1L['d'][0] // S_1L['t'][0],\
                 S_1L['t'][0], order = 'C')

    psc_y = psc_y.sum(-1, keepdims=True)
    psc_y = np.expand_dims(psc_y, 2)

    psd_y = psd_y.sum(-1, keepdims=True)
    psd_y = np.expand_dims(psd_y, 1)

    # p(sC, sD, st) = p(sC | yC) p(sD | yD) p(st | yC, yD)
    # Add a normalization ?
    psCsDst_y = psc_y * psd_y * np.expand_dims(np.expand_dims(pst_yCyD, 1), 1)

    Ezst_y = []

    for l in range(Lh):
        #======================================================================
        # Compute the full expectations multiplying by p(sC, sD, st | yC, yD)
        #======================================================================

        Ez1_yst = (psCsDst_y[..., n_axis] * Ez_ys[l]).sum((1, 2))
        Ezst_y.append(deepcopy(Ez1_yst))

        Ez2_yst = (psCsDst_y[..., n_axis] * Ez_ys[l + 1]).sum((1, 2))

        E_z1z2T_yst = (psCsDst_y[..., n_axis, n_axis] * \
                                       E_z1z2T_ys[l]).sum((1, 2))

        E_z2z2T_yst = (psCsDst_y[..., n_axis, n_axis] * \
                                       E_z2z2T_ys[l]).sum((1, 2))

        EeeT_yst = (psCsDst_y[..., n_axis, n_axis] * \
                                       EeeT_ys[l]).sum((1, 2))

        #======================================================================
        # Broadcast the expectations
        #======================================================================

        Ez1_yst = Ez1_yst.reshape(numobs, *k_1L['t'], rh[l], order='C')
        Ez2_yst = Ez2_yst.reshape(numobs, *k_1L['t'], rh[l + 1], order='C')
        E_z1z2T_yst = E_z1z2T_yst.reshape(numobs,
                                          *k_1L['t'],
                                          rh[l],
                                          rh[l + 1],
                                          order='C')
        E_z2z2T_yst = E_z2z2T_yst.reshape(numobs,
                                          *k_1L['t'],
                                          rh[l + 1],
                                          rh[l + 1],
                                          order='C')
        EeeT_yst = EeeT_yst.reshape(numobs,
                                    *k_1L['t'],
                                    rh[l],
                                    rh[l],
                                    order='C')

        # Sum all the paths going through the components of the layer
        idx_to_sum = tuple(set(range(1, L_1L['t'] + 1)) - set([l + 1]))

        #======================================================================
        # Compute the estimators
        #======================================================================

        # One common denominator for all estimators
        den = psCsDst_y.sum((1, 2))
        den = den.reshape(numobs, *k_1L['t'], order='C').sum(idx_to_sum)
        den = den.sum(0)[..., n_axis, n_axis]
        den = np.where(den < 1E-14, 1E-14, den)

        # eta estimator
        eta_num = Ez1_yst.sum(idx_to_sum)[..., n_axis] -\
            H_old[l][n_axis] @ Ez2_yst.sum(idx_to_sum)[..., n_axis]
        eta_new = eta_num.sum(0) / den

        eta.append(eta_new)

        # Lambda estimator
        H_num = E_z1z2T_yst.sum(idx_to_sum) - \
            eta_new[n_axis] @ np.expand_dims(Ez2_yst.sum(idx_to_sum), 2)

        try:
            H_new = (H_num @ pinv(E_z2z2T_yst.sum(idx_to_sum),
                                  rcond=1e-2)).sum(0) / den
        except:
            print(E_z2z2T_yst.sum(idx_to_sum))
            raise RuntimeError('Overflow ?')
        H.append(H_new)

        # Psi estimator
        psi_new = EeeT_yst.sum(idx_to_sum).sum(0) / den
        psi.append(psi_new)

    return eta, H, psi, Ezst_y
Example #8
0
def M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H_old, k, L_1Lh,
                rh):
    ''' 
    Compute the estimators of eta, Lambda and Psi for all components and all layers of both heads
    Ez_ys (list of ndarrays): E(z^{(l)} | y, s) for all (l,s)
    E_z1z2T_ys (list of ndarrays):  E(z^{(l)}z^{(l+1)T} | y, s) 
    E_z2z2T_ys (list of ndarrays):  E(z^{(l+1)}z^{(l+1)T} | y, s) 
    EeeT_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s), 
            E(e | y, s) with e = z^{(l)} - eta{k_l}^{(l)} - Lambda @ z^{(l + 1)}
    ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega
    H_old (list of ndarrays): The previous iteration values of Lambda estimators
    k (dict): The number of component on each layer
    L_1Lh (list of int): The number layers starting from the head h until the end of the common tail
    rh (list): The dimension of each layer of the head h
    --------------------------------------------------------------------------
    returns (list of ndarrays): The new estimators of eta, Lambda and Psi 
                                            for all components and all layers
    '''
    epsilon = 1E-16

    Lh = len(E_z1z2T_ys)
    numobs = len(Ez_ys[0])

    eta = []
    H = []
    psi = []

    for l in range(Lh):

        #===============================================
        # Broadcast the quantities to the right shape
        #===============================================

        Ez1_ys_l = Ez_ys[l].reshape(numobs, *k, rh[l], order='C')
        Ez2_ys_l = Ez_ys[l + 1].reshape(numobs, *k, rh[l + 1], order='C')
        E_z1z2T_ys_l = E_z1z2T_ys[l].reshape(numobs,
                                             *k,
                                             rh[l],
                                             rh[l + 1],
                                             order='C')
        E_z2z2T_ys_l = E_z2z2T_ys[l].reshape(numobs,
                                             *k,
                                             rh[l + 1],
                                             rh[l + 1],
                                             order='C')
        EeeT_ys_l = EeeT_ys[l].reshape(numobs, *k, rh[l], rh[l], order='C')

        # Sum all the path going through the layer
        idx_to_sum = tuple(set(range(1, L_1Lh)) - set([l + 1]))
        ps_yl = ps_y.reshape(numobs, *k,
                             order='C').sum(idx_to_sum)[..., n_axis, n_axis]

        # Compute common denominator
        den = ps_yl.sum(0)
        den = np.where(den < epsilon, epsilon, den)

        #===============================================
        # eta estimator
        #===============================================

        eta_num = Ez1_ys_l.sum(idx_to_sum)[..., n_axis] -\
            H_old[l][n_axis] @ Ez2_ys_l.sum(idx_to_sum)[..., n_axis]
        eta_new = (ps_yl * eta_num).sum(0) / den

        eta.append(eta_new)

        #===============================================
        # Lambda estimator
        #===============================================

        H_num = E_z1z2T_ys_l.sum(idx_to_sum) - \
            eta_new[n_axis] @ np.expand_dims(Ez2_ys_l.sum(idx_to_sum), 2)

        H_new = (ps_yl *
                 H_num @ pinv(E_z2z2T_ys_l.sum(idx_to_sum))).sum(0) / den
        H.append(H_new)

        #===============================================
        # Psi estimator
        #===============================================

        psi_new = (ps_yl * EeeT_ys_l.sum(idx_to_sum)).sum(0) / den
        psi.append(psi_new)

    return eta, H, psi