def compute_rho(eta, H, psi, mu_s, sigma_s, z_c, chsi): ''' Compute rho as defined in equation (8) of the DGMM paper eta (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu parameters for each layer H (list of nb_layers elements of shape (K_l x r_{l-1}, r_l)): Lambda parameters for each layer psi (list of nb_layers elements of shape (K_l x r_{l-1}, r_{l-1})): Psi parameters for each layer z_c (list of nd-arrays) z^{(l)} - eta^{(l)} for each layer. chsi (list of nd-arrays): The chsi parameters for each layer ----------------------------------------------------------------------- returns (list of ndarrays): The rho parameters (covariance matrices) for all paths starting at each layer ''' L = len(H) rho = [0 for i in range(L)] k = [len(h) for h in H] k_aug = k + [1] for l in range(0, L): sigma_next_l = np.tile(sigma_s[l + 1], (k[l], 1, 1)) mu_next_l = np.tile(mu_s[l + 1], (k[l], 1, 1)) HxPsi_inv = t(H[l], (0, 2, 1)) @ pinv(psi[l]) HxPsi_inv = np.repeat(HxPsi_inv, np.prod(k_aug[l + 1: ]), axis = 0) rho[l] = chsi[l][n_axis] @ (HxPsi_inv[n_axis] @ z_c[l][..., n_axis] \ + (pinv(sigma_next_l) @ mu_next_l)[n_axis]) return rho
def identifiable_estim_DGMM(eta_old, H_old, psi_old, Ez, AT): ''' Enforce identifiability conditions for DGMM estimators eta_old (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu estimators of the previous iteration for each layer H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda estimators of the previous iteration for each layer psi_old (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi estimators of the previous iteration for each layer Ez1 (list of ndarrays): E(z^{(l)}) for all l AT (list of ndarrays): Var(z^{(1)})^{-1/2 T} for all l ------------------------------------------------------------------------- returns (tuple of length 3): "identifiable" estimators of eta, Lambda and Psi (1st condition) ''' L = len(eta_old) eta_new = [[] for l in range(L)] H_new = [[] for l in range(L)] psi_new = [[] for l in range(L)] for l in reversed(range(L)): inv_AT = pinv(AT[l]) # Identifiability psi_new[l] = inv_AT @ psi_old[l] @ t(inv_AT, (0, 2, 1)) H_new[l] = inv_AT @ H_old[l] eta_new[l] = inv_AT @ (eta_old[l] - Ez[l]) return eta_new, H_new, psi_new
def identifiable_estim_DDGMM(eta_old, H_old, psi_old, Ez, AT): ''' Ensure that the latent variables are centered reduced (1st DGMM identifiability condition) eta_old (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu estimators of the previous iteration for each layer H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda estimators of the previous iteration for each layer psi_old (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi estimators of the previous iteration for each layer Ez1 (list of (k_l, r_l) ndarray): E(z^{(l)}) AT (list of (k_l, k_l) ndarray): Var(z^{(l)})^{-1/2 T} ------------------------------------------------------------------------- returns (tuple of length 3): "DDGMM identifiable" estimators of eta, Lambda and Psi ''' L = len(eta_old) eta_new = [[] for l in range(L)] H_new = [[] for l in range(L)] psi_new = [[] for l in range(L)] for l in reversed(range(L)): inv_AT = pinv(AT[l]) # Identifiability psi_new[l] = inv_AT @ psi_old[l] @ t(inv_AT, (0, 2, 1)) H_new[l] = inv_AT @ H_old[l] eta_new[l] = inv_AT @ (eta_old[l] - Ez[l]) return eta_new, H_new, psi_new
def compute_chsi(H, psi, mu_s, sigma_s): ''' Compute chsi as defined in equation (8) of the DGMM paper H (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda parameters for each layer psi (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi parameters for each layer mu_s (list of nd-arrays): The means of the Gaussians starting at each layer sigma_s (list of nd-arrays): The covariance matrices of the Gaussians starting at each layer ------------------------------------------------------------------------------------------------ returns (list of ndarray): The chsi parameters for all paths starting at each layer ''' L = len(H) k = [len(h) for h in H] #===================================================================== # Initiating the parameters for all layers #===================================================================== # Initialization with the parameters of the last layer chsi = [0 for i in range(L)] chsi[-1] = pinv(pinv(sigma_s[-1]) + t(H[-1], (0, 2, 1)) @ pinv(psi[-1]) @ H[-1]) #================================================================================== # Compute chsi from top to bottom #================================================================================== for l in range(L - 1): Ht_psi_H = t(H[l], (0, 2, 1)) @ pinv(psi[l]) @ H[l] Ht_psi_H = np.repeat(Ht_psi_H, np.prod(k[l + 1:]), axis = 0) sigma_next_l = np.tile(sigma_s[l + 1], (k[l], 1, 1)) chsi[l] = pinv(pinv(sigma_next_l) + Ht_psi_H) return chsi
def diagonal_cond(H_old, psi_old): ''' Ensure that Lambda^T Psi^{-1} Lambda is diagonal H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): The previous iteration values of Lambda estimators psi_old (list of ndarrays): The previous iteration values of Psi estimators (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)) ------------------------------------------------------------------------ returns (list of ndarrays): An "identifiable" H estimator (2nd condition) ''' L = len(H_old) H = [] for l in range(L): B = np.transpose(H_old[l], (0, 2, 1)) @ pinv(psi_old[l]) @ H_old[l] values, vec = eigh(B) H.append(H_old[l] @ vec) return H
def M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H_old, k): ''' Compute the estimators of eta, Lambda and Psi for all components and all layers Ez_ys (list of ndarrays): E(z^{(l)} | y, s) for all (l,s) E_z1z2T_ys (list of ndarrays): E(z^{(l)}z^{(l+1)T} | y, s) E_z1z2T_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s) EeeT_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s), E(e | y, s) with e = z^{(l)} - eta{k_l}^{(l)} - Lambda @ z^{(l + 1)} ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega H_old (list of ndarrays): The previous iteration values of Lambda estimators k (list of int): The number of component on each layer -------------------------------------------------------------------------- returns (list of ndarrays): The new estimators of eta, Lambda and Psi for all components and all layers ''' epsilon = 1E-14 L = len(E_z1z2T_ys) r = [Ez_ys[l].shape[2] for l in range(L + 1)] numobs = len(Ez_ys[0]) eta = [] H = [] psi = [] for l in range(L): Ez1_ys_l = Ez_ys[l].reshape(numobs, *k, r[l], order='C') Ez2_ys_l = Ez_ys[l + 1].reshape(numobs, *k, r[l + 1], order='C') E_z1z2T_ys_l = E_z1z2T_ys[l].reshape(numobs, *k, r[l], r[l + 1], order='C') E_z2z2T_ys_l = E_z2z2T_ys[l].reshape(numobs, *k, r[l + 1], r[l + 1], order='C') EeeT_ys_l = EeeT_ys[l].reshape(numobs, *k, r[l], r[l], order='C') # Sum all the path going through the layer idx_to_sum = tuple(set(range(1, L + 1)) - set([l + 1])) ps_yl = ps_y.reshape(numobs, *k, order='C').sum(idx_to_sum)[..., n_axis, n_axis] # Compute common denominator den = ps_yl.sum(0) den = np.where(den < epsilon, epsilon, den) # eta estimator eta_num = Ez1_ys_l.sum(idx_to_sum)[..., n_axis] -\ H_old[l][n_axis] @ Ez2_ys_l.sum(idx_to_sum)[..., n_axis] eta_new = (ps_yl * eta_num).sum(0) / den eta.append(eta_new) # Lambda estimator H_num = E_z1z2T_ys_l.sum(idx_to_sum) - \ eta_new[n_axis] @ np.expand_dims(Ez2_ys_l.sum(idx_to_sum), 2) H_new = (ps_yl * H_num @ pinv(E_z2z2T_ys_l.sum(idx_to_sum))).sum(0) / den H.append(H_new) # Psi estimator psi_new = (ps_yl * EeeT_ys_l.sum(idx_to_sum)).sum(0) / den psi.append(psi_new) return eta, H, psi
def M_step_DGMM_t(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, \ ps_y_c, ps_y_d, pst_yCyD, H_old, S_1L, k_1L, L_1L, L, rh): ''' Compute the estimators of eta, Lambda and Psi for all components and all layers of the tail Ez_ys (list of ndarrays): E(z^{(l)} | y, s) for all (l,s) E_z1z2T_ys (list of ndarrays): E(z^{(l)}z^{(l+1)T} | y, s) E_z2z2T_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s) EeeT_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s), E(e | y, s) with e = z^{(l)} - eta{k_l}^{(l)} - Lambda @ z^{(l + 1)} ps_y_* ((numobs, S) nd-array): p(s | y) for all s in Omega^* pst_yCyD ((numobs, S) nd-array): p(s^t | y) for all s in the tail H_old (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda estimators of the previous iteration for each layer S_1L (dict): The number of paths starting at each layer until the common tail k_1L (list of int): The number of component on each layer including the common tail L_1L (dict): The number of layers where the lists include the heads and the tail layers L (dict): The number of layers in the networks (to delete in future versions) rh (list): The dimension of each layer of the head h -------------------------------------------------------------------------- returns (tuple of length 4): The new estimators of eta, Lambda and Psi for all components and all layers and the associated latent expectancy ''' Lh = len(E_z1z2T_ys) numobs = len(Ez_ys[0]) eta = [] H = [] psi = [] #========================================================================== # Broadcast path probabilities #========================================================================== psc_y = ps_y_c.reshape(numobs, S_1L['c'][0] // S_1L['t'][0],\ S_1L['t'][0], order = 'C') psd_y = ps_y_d.reshape(numobs, S_1L['d'][0] // S_1L['t'][0],\ S_1L['t'][0], order = 'C') psc_y = psc_y.sum(-1, keepdims=True) psc_y = np.expand_dims(psc_y, 2) psd_y = psd_y.sum(-1, keepdims=True) psd_y = np.expand_dims(psd_y, 1) # p(sC, sD, st) = p(sC | yC) p(sD | yD) p(st | yC, yD) # Add a normalization ? psCsDst_y = psc_y * psd_y * np.expand_dims(np.expand_dims(pst_yCyD, 1), 1) Ezst_y = [] for l in range(Lh): #====================================================================== # Compute the full expectations multiplying by p(sC, sD, st | yC, yD) #====================================================================== Ez1_yst = (psCsDst_y[..., n_axis] * Ez_ys[l]).sum((1, 2)) Ezst_y.append(deepcopy(Ez1_yst)) Ez2_yst = (psCsDst_y[..., n_axis] * Ez_ys[l + 1]).sum((1, 2)) E_z1z2T_yst = (psCsDst_y[..., n_axis, n_axis] * \ E_z1z2T_ys[l]).sum((1, 2)) E_z2z2T_yst = (psCsDst_y[..., n_axis, n_axis] * \ E_z2z2T_ys[l]).sum((1, 2)) EeeT_yst = (psCsDst_y[..., n_axis, n_axis] * \ EeeT_ys[l]).sum((1, 2)) #====================================================================== # Broadcast the expectations #====================================================================== Ez1_yst = Ez1_yst.reshape(numobs, *k_1L['t'], rh[l], order='C') Ez2_yst = Ez2_yst.reshape(numobs, *k_1L['t'], rh[l + 1], order='C') E_z1z2T_yst = E_z1z2T_yst.reshape(numobs, *k_1L['t'], rh[l], rh[l + 1], order='C') E_z2z2T_yst = E_z2z2T_yst.reshape(numobs, *k_1L['t'], rh[l + 1], rh[l + 1], order='C') EeeT_yst = EeeT_yst.reshape(numobs, *k_1L['t'], rh[l], rh[l], order='C') # Sum all the paths going through the components of the layer idx_to_sum = tuple(set(range(1, L_1L['t'] + 1)) - set([l + 1])) #====================================================================== # Compute the estimators #====================================================================== # One common denominator for all estimators den = psCsDst_y.sum((1, 2)) den = den.reshape(numobs, *k_1L['t'], order='C').sum(idx_to_sum) den = den.sum(0)[..., n_axis, n_axis] den = np.where(den < 1E-14, 1E-14, den) # eta estimator eta_num = Ez1_yst.sum(idx_to_sum)[..., n_axis] -\ H_old[l][n_axis] @ Ez2_yst.sum(idx_to_sum)[..., n_axis] eta_new = eta_num.sum(0) / den eta.append(eta_new) # Lambda estimator H_num = E_z1z2T_yst.sum(idx_to_sum) - \ eta_new[n_axis] @ np.expand_dims(Ez2_yst.sum(idx_to_sum), 2) try: H_new = (H_num @ pinv(E_z2z2T_yst.sum(idx_to_sum), rcond=1e-2)).sum(0) / den except: print(E_z2z2T_yst.sum(idx_to_sum)) raise RuntimeError('Overflow ?') H.append(H_new) # Psi estimator psi_new = EeeT_yst.sum(idx_to_sum).sum(0) / den psi.append(psi_new) return eta, H, psi, Ezst_y
def M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H_old, k, L_1Lh, rh): ''' Compute the estimators of eta, Lambda and Psi for all components and all layers of both heads Ez_ys (list of ndarrays): E(z^{(l)} | y, s) for all (l,s) E_z1z2T_ys (list of ndarrays): E(z^{(l)}z^{(l+1)T} | y, s) E_z2z2T_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s) EeeT_ys (list of ndarrays): E(z^{(l+1)}z^{(l+1)T} | y, s), E(e | y, s) with e = z^{(l)} - eta{k_l}^{(l)} - Lambda @ z^{(l + 1)} ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega H_old (list of ndarrays): The previous iteration values of Lambda estimators k (dict): The number of component on each layer L_1Lh (list of int): The number layers starting from the head h until the end of the common tail rh (list): The dimension of each layer of the head h -------------------------------------------------------------------------- returns (list of ndarrays): The new estimators of eta, Lambda and Psi for all components and all layers ''' epsilon = 1E-16 Lh = len(E_z1z2T_ys) numobs = len(Ez_ys[0]) eta = [] H = [] psi = [] for l in range(Lh): #=============================================== # Broadcast the quantities to the right shape #=============================================== Ez1_ys_l = Ez_ys[l].reshape(numobs, *k, rh[l], order='C') Ez2_ys_l = Ez_ys[l + 1].reshape(numobs, *k, rh[l + 1], order='C') E_z1z2T_ys_l = E_z1z2T_ys[l].reshape(numobs, *k, rh[l], rh[l + 1], order='C') E_z2z2T_ys_l = E_z2z2T_ys[l].reshape(numobs, *k, rh[l + 1], rh[l + 1], order='C') EeeT_ys_l = EeeT_ys[l].reshape(numobs, *k, rh[l], rh[l], order='C') # Sum all the path going through the layer idx_to_sum = tuple(set(range(1, L_1Lh)) - set([l + 1])) ps_yl = ps_y.reshape(numobs, *k, order='C').sum(idx_to_sum)[..., n_axis, n_axis] # Compute common denominator den = ps_yl.sum(0) den = np.where(den < epsilon, epsilon, den) #=============================================== # eta estimator #=============================================== eta_num = Ez1_ys_l.sum(idx_to_sum)[..., n_axis] -\ H_old[l][n_axis] @ Ez2_ys_l.sum(idx_to_sum)[..., n_axis] eta_new = (ps_yl * eta_num).sum(0) / den eta.append(eta_new) #=============================================== # Lambda estimator #=============================================== H_num = E_z1z2T_ys_l.sum(idx_to_sum) - \ eta_new[n_axis] @ np.expand_dims(Ez2_ys_l.sum(idx_to_sum), 2) H_new = (ps_yl * H_num @ pinv(E_z2z2T_ys_l.sum(idx_to_sum))).sum(0) / den H.append(H_new) #=============================================== # Psi estimator #=============================================== psi_new = (ps_yl * EeeT_ys_l.sum(idx_to_sum)).sum(0) / den psi.append(psi_new) return eta, H, psi