def LMO_err(params, M=2, verbal=False): global Nfeval params = np.exp(params) al, bl = params[:-1], params[ -1] # params[:int(n_params/2)], params[int(n_params/2):] # [np.exp(e) for e in params] if train.x.shape[1] < 5: train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN else: train_L, dev_L = 0, 0 for i in range(len(al)): train_L += train_L0[i] / al[i]**2 train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN tmp_mat = train_L @ eig_vec_K C = train_L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val) @ tmp_mat.T / N2 c = C @ W_nystr_Y * N2 c_y = c - train.y lmo_err = 0 N = 0 for ii in range(1): permutation = np.random.permutation(train.x.shape[0]) for i in range(0, train.x.shape[0], M): indices = permutation[i:i + M] K_i = train_W[np.ix_(indices, indices)] * N2 C_i = C[np.ix_(indices, indices)] c_y_i = c_y[indices] b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i lmo_err += b_y.T @ K_i @ b_y N += 1 return lmo_err[0, 0] / M**2
def LMO_err(params, M=10): np.random.seed(2) random.seed(2) al, bl = np.exp(params) L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: tmp_mat = L @ eig_vec_K C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val_K) @ tmp_mat.T / N2 c = C @ W_nystr_Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) C = L @ LWL_inv @ L / N2 c = C @ W @ Y * N2 c_y = c - Y lmo_err = 0 N = 0 for ii in range(1): permutation = np.random.permutation(X.shape[0]) for i in range(0, X.shape[0], M): indices = permutation[i:i + M] K_i = W[np.ix_(indices, indices)] * N2 C_i = C[np.ix_(indices, indices)] c_y_i = c_y[indices] b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i lmo_err += b_y.T @ K_i @ b_y N += 1 return lmo_err[0, 0] / N / M ** 2
def LMO_err(params, M=2): params = np.exp(params) al, bl = params[:-1], params[-1] L = bl * bl * np.exp(-L0[0] / al[0] / al[0] / 2) + bl * bl * np.exp( -L0[1] / al[1] / al[1] / 2) + 1e-6 * EYEN # l(X,None,al,bl)# +1e-6*EYEN if nystr: tmp_mat = L @ eig_vec_K C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val_K) @ tmp_mat.T / N2 c = C @ W_nystr_Y * N2 else: LWL_inv = chol_inv( L @ W @ L + L / N2 + JITTER * EYEN ) # chol_inv(W*N2+L_inv) # chol_inv(L@W@L+L/N2 +JITTER*EYEN) C = L @ LWL_inv @ L / N2 c = C @ W @ Y * N2 c_y = c - Y lmo_err = 0 N = 0 for ii in range(1): permutation = np.random.permutation(X.shape[0]) for i in range(0, X.shape[0], M): indices = permutation[i:i + M] K_i = W[np.ix_(indices, indices)] * N2 C_i = C[np.ix_(indices, indices)] c_y_i = c_y[indices] b_y = np.linalg.inv(np.eye(C_i.shape[0]) - C_i @ K_i) @ c_y_i # print(I_CW_inv.shape,c_y_i.shape) lmo_err += b_y.T @ K_i @ b_y N += 1 return lmo_err[0, 0] / N / M**2
def list_permute(X, Y, k, l, n_permute=400, seed=8273): """ Return a numpy array of HSIC's for each permutation. This is an implementation where kernel matrices are pre-computed. TODO: can be improved. """ if X.shape[0] != Y.shape[0]: raise ValueError( 'X and Y must have the same number of rows (sample size') n = X.shape[0] r = 0 arr_hsic = np.zeros(n_permute) K = k.eval(X, X) L = l.eval(Y, Y) # set the seed rand_state = np.random.get_state() np.random.seed(seed) while r < n_permute: # shuffle the order of X, Y while still keeping the original pairs ind = np.random.choice(n, n, replace=False) Ks = K[np.ix_(ind, ind)] #Xs = X[ind] #Ys = Y[ind] #Ks2 = k.eval(Xs, Xs) #assert np.linalg.norm(Ks - Ks2, 'fro') < 1e-4 Ls = L[np.ix_(ind, ind)] Kmean = np.mean(Ks, 0) HK = Ks - Kmean HKf = HK.flatten() / (n - 1) # shift Ys n-1 times for s in range(n - 1): if r >= n_permute: break Ls = np.roll(Ls, 1, axis=0) Ls = np.roll(Ls, 1, axis=1) # compute HSIC Lmean = np.mean(Ls, 0) HL = Ls - Lmean # t = trace(KHLH) HLf = HL.T.flatten() / (n - 1) bhsic = HKf.dot(HLf) arr_hsic[r] = bhsic r = r + 1 # reset the seed back np.random.set_state(rand_state) return arr_hsic
def scatterMatrixInto(globalMatrix, elementMatrix, locationMap): size1 = elementMatrix.shape[0] size2 = elementMatrix.shape[0] if size1 != elementMatrix.shape[1]: raise ValueError('Element matrix must be square!') if size1 != locationMap.shape[0]: raise ValueError( 'Element matrix and location map size do not correspond! Make sure location map has size #dof.' ) globalMatrix[numpy.ix_(locationMap, locationMap)] = globalMatrix[numpy.ix_( locationMap, locationMap)] + elementMatrix
def gaussian_trig(m, v, i, e=None): d = len(m) L = len(i) e = np.ones((1, L)) if e is None else np.atleast_2d(e) ee = np.vstack([e, e]).reshape(1, -1, order='F') mi = np.atleast_2d(m[i]) vi = v[np.ix_(i, i)] vii = np.atleast_2d(np.diag(vi)) M = np.vstack([e * exp(-vii / 2) * sin(mi), e * exp(-vii / 2) * cos(mi)]) M = M.flatten(order='F') lq = -(vii.T + vii) / 2 q = exp(lq) U1 = (exp(lq + vi) - q) * sin(mi.T - mi) U2 = (exp(lq - vi) - q) * sin(mi.T + mi) U3 = (exp(lq + vi) - q) * cos(mi.T - mi) U4 = (exp(lq - vi) - q) * cos(mi.T + mi) V = np.vstack( [np.hstack([U3 - U4, U1 + U2]), np.hstack([(U1 + U2).T, U3 + U4])]) V = np.vstack([ np.hstack([V[::2, ::2], V[::2, 1::2]]), np.hstack([V[1::2, ::2], V[1::2, 1::2]]) ]) V = np.dot(ee.T, ee) * V / 2 C = np.hstack([np.diag(M[1::2]), -np.diag(M[::2])]) C = np.hstack([C[:, ::2], C[:, 1::2]]) C = fill_mat(C, np.zeros((d, 2 * L)), i, None) return M, V, C
def test_multivariate_normal_logpdf_batches_and_states_shared_cov_masked(D=10): # Test broadcasting over B batches, N datapoints, and K means, 1 covariance, with masks B = 3 N = 100 K = 5 x = npr.randn(B, N, D) mask = npr.rand(B, N, D) < .5 mu = npr.randn(K, D) L = npr.randn(D, D) Sigma = np.dot(L, L.T) ll1 = multivariate_normal_logpdf(x[:, :, None, :], mu, Sigma, mask=mask[:, :, None, :]) assert ll1.shape == (B, N, K) ll2 = np.empty((B, N, K)) for b in range(B): for n in range(N): m = mask[b, n] if m.sum() == 0: ll2[b, n] = 0 else: for k in range(K): ll2[b, n, k] = mvn.logpdf(x[b, n][m], mu[k][m], Sigma[np.ix_(m, m)]) assert np.allclose(ll1, ll2)
def loss_cp(self, m, s): D0 = np.size(s, 1) D1 = D0 + 2 * len(self.angle) M = m S = s ell = self.p Q = np.dot(np.vstack([1, ell]), np.array([[1, ell]])) Q = fill_mat(Q, np.zeros((D1, D1)), [0, D0], [0, D0]) Q = fill_mat(ell**2, Q, [D0 + 1], [D0 + 1]) target = gaussian_trig(self.target, 0 * s, self.angle)[0] target = np.hstack([self.target, target]) i = np.arange(D0) m, s, c = gaussian_trig(M, S, self.angle) q = np.dot(S[np.ix_(i, i)], c) M = np.hstack([M, m]) S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])]) w = self.width if hasattr(self, "width") else [1] L = np.array([0]) S2 = np.array(0) for i in range(len(w)): self.z = target self.W = Q / w[i]**2 r, s2, c = self.loss_sat(M, S) L = L + r S2 = S2 + s2 return L / len(w)
def permute(self, perm): """ Permute the discrete latent states. """ self.log_Ps = self.log_Ps[np.ix_(perm, perm)] self.mus = self.mus[perm] self.sqrt_Sigmas = self.sqrt_Sigmas[perm] self.Ws = self.Ws[perm]
def permute(self, perm): """ Permute the discrete latent states. """ self.Ps = self.Ps[np.ix_(perm, perm)] self.rs = self.rs[perm] self.ps = self.ps[perm] # Reset the transition matrix self._transition_matrix = None
def permutation_list_mmd2_gram(X1, X2, Y1, Y2, k, kx, n_permute=400): """ Repeatedly mix, permute X,Y and compute MMD^2. This is intended to be used to approximate the null distritubion. """ Y1Y2 = np.vstack((Y1, Y2)) Ky1y2y1y2 = k.eval(Y1Y2, Y1Y2) rand_state = np.random.get_state() np.random.seed() ny1y2 = Y1Y2.shape[0] ny1 = Y1.shape[0] ny2 = Y2.shape[0] list_mmd2 = np.zeros(n_permute) for r in range(n_permute): # print r ind = np.random.choice(ny1y2, ny1y2, replace=False) # divide into new y1, y2 indy1 = ind[:ny1] # print(indy1) indy2 = ind[ny1:] Ky1 = Ky1y2y1y2[np.ix_(indy1, indy1)] # print(Ky1) Ky2 = Ky1y2y1y2[np.ix_(indy2, indy2)] Ky1y2 = Ky1y2y1y2[np.ix_(indy1, indy2)] weights, _ = WQuadMMDTest.kernel_mean_matching(X1, X2, kx) Ky1 = np.matmul(np.matmul(np.diag(weights[:, 0]), Ky1), np.diag(weights[:, 0])) Ky1y2 = np.matmul(np.diag(weights[:, 0]), Ky1y2) mmd2r, var = WQuadMMDTest.h1_mean_var_gram(Ky1, Ky2, Ky1y2, is_var_computed=False) list_mmd2[r] = mmd2r np.random.set_state(rand_state) return list_mmd2
def sd_values(self, config, pos): """Returns the values of a given slater determinant on the position specified by pos. Args : config : electronic configuration pos : ndarray shape(N,3) Returns: values : nd array shape(N,Nbasis) """ mo_vals = self.mo_values(pos) return np.linalg.det(mo_vals[np.ix_(config, config)])
def test_multivariate_normal_logpdf_simple_masked(D=10): # Test single datapoint log pdf with mask x = npr.randn(D) mask = npr.rand(D) < 0.5 mask[0] = True mu = npr.randn(D) L = npr.randn(D, D) Sigma = np.dot(L, L.T) ll1 = multivariate_normal_logpdf(x, mu, Sigma, mask=mask) ll2 = mvn.logpdf(x[mask], mu[mask], Sigma[np.ix_(mask, mask)]) assert np.allclose(ll1, ll2)
def electron_integrals(*args): r"""Compute the one- and two-electron integrals in the molecular orbital basis. Args: args (array[array[float]]): initial values of the differentiable parameters Returns: tuple[array[float]]: 1D tuple containing core constant, one- and two-electron integrals """ _, coeffs, _, h_core, repulsion_tensor = generate_scf(mol)(*args) one = anp.einsum("qr,rs,st->qt", coeffs.T, h_core, coeffs) two = anp.swapaxes( anp.einsum("ab,cd,bdeg,ef,gh->acfh", coeffs.T, coeffs.T, repulsion_tensor, coeffs, coeffs), 1, 3, ) core_constant = nuclear_energy(mol.nuclear_charges, mol.coordinates)(*args) if core is None and active is None: return core_constant, one, two for i in core: core_constant = core_constant + 2 * one[i][i] for j in core: core_constant = core_constant + 2 * two[i][j][j][i] - two[i][ j][i][j] for p in active: for q in active: for i in core: o = anp.zeros(one.shape) o[p, q] = 1.0 one = one + (2 * two[i][p][q][i] - two[i][p][i][q]) * o one = one[anp.ix_(active, active)] two = two[anp.ix_(active, active, active, active)] return core_constant, one, two
def permutation_list_mmd2_gram(X, Y, wx, wy, k, n_permute=400, seed=8273): """ Repeatedly mix, permute X,Y and compute MMD^2. This is intended to be used to approximate the null distritubion. """ XY = np.vstack((X, Y)) wxy = np.vstack((wx, wy)) Kxyxy = k.eval(XY, XY) #np.multiply(np.outer(wxy,wxy),k.eval(XY, XY)) rand_state = np.random.get_state() np.random.seed(seed) nxy = XY.shape[0] #nxy = np.sum(wxy)# nx = X.shape[0] #nx = np.sum(wx)# y = Y.shape[0] #ny= np.sum(wy)#n list_mmd2 = np.zeros(n_permute) for r in range(n_permute): #print r ind = np.random.choice(nxy, nxy, replace=False) #len(wxy), len(wxy) # divide into new X, Y indx = ind[:nx] #print(indx) indy = ind[nx:] Kx = Kxyxy[np.ix_(indx, indx)] #print(Kx) Ky = Kxyxy[np.ix_(indy, indy)] Kxy = Kxyxy[np.ix_(indx, indy)] mmd2r, var = QuadMMDTest.h1_mean_var_gram(Kx, Ky, Kxy, wx, wy, is_var_computed=False) list_mmd2[r] = mmd2r np.random.set_state(rand_state) return list_mmd2
def createNumberingForOrderTemplate(self, p): numberOfFields, spaceDim = p.shape numbering = [] for iFieldComponent in range(numberOfFields): degreesForFieldComponent = p[0, :] + 1 if spaceDim == 1: numering.append(numpy.arange(degreesForFieldComponent[0, 0])) if spaceDim == 2: mapping = numpy.zeros(degreesForFieldComponent, dtype=numpy.int) pMinusOneR = degreesForFieldComponent[0] - 2 pMinusOneS = degreesForFieldComponent[1] - 2 # nodal modes mapping[0:2, 0] = [0, 2] mapping[0:2, 1] = [1, 3] # edge modes index = 4 mapping[2:, 0] = numpy.arange(index, index + pMinusOneR) mapping[2:, 1] = numpy.arange(index + pMinusOneR, index + 2 * pMinusOneR) index += 2 * pMinusOneR mapping[0, 2:] = numpy.arange(index, index + pMinusOneS) mapping[1, 2:] = numpy.arange(index + pMinusOneS, index + 2 * pMinusOneS) index += 2 * pMinusOneS volumeModeIndices = numpy.ix_( numpy.arange(2, degreesForFieldComponent[0]), numpy.arange(2, degreesForFieldComponent[0])) mapping[volumeModeIndices] = numpy.reshape( numpy.arange(index, index + pMinusOneR * pMinusOneS), (pMinusOneS, pMinusOneR)).T numbering.append(mapping) return numbering
def concat(con, sat, policy, m, s): max_u = policy.max_u E = len(max_u) D = len(m) F = D + E i, j = np.arange(D), np.arange(D, F) M = m S = fill_mat(s, np.zeros((F, F))) m, s, c = con(policy, m, s) M = np.hstack([M, m]) S = fill_mat(s, S, j, j) q = np.matmul(S[np.ix_(i, i)], c) S = fill_mat(q, S, i, j) S = fill_mat(q.T, S, j, i) M, S, R = sat(M, S, j, max_u) C = np.hstack([np.eye(D), c]) @ R return M, S, C
def gaussian_sin(m, v, i, e=None): d = len(m) L = len(i) e = np.ones((1, L)) if e is None else np.atleast_2d(e) mi = np.atleast_2d(m[i]) vi = v[np.ix_(i, i)] vii = np.atleast_2d(np.diag(vi)) M = e * exp(-vii / 2) * sin(mi) M = M.flatten() lq = -(vii.T + vii) / 2 q = exp(lq) V = ((exp(lq + vi) - q) * cos(mi.T - mi) - (exp(lq - vi) - q) * cos(mi.T + mi)) V = np.dot(e.T, e) * V / 2 C = np.diag((e * exp(-vii / 2) * cos(mi)).flatten()) C = fill_mat(C, np.zeros((d, L)), i, None) return M, V, C
def path_proba_selection(w_s_c_, w_s_d_, k, k_to_keep, new_Lt): ''' Utility to update the path probabilities after the selection w_s_* (list): The path probabilities starting from the * head k (dict): The original number of component on each layer k_to_keep (dict): The components selected in the network new_Lt (int): The selected number of layers on the common tail. -------------------------------------------------------------------------- returns (tuple of size 2): The paths probabilities starting from each head ''' # Deal with both heads w = {'d': w_s_d_.reshape(*np.concatenate([k['d'], k['t']]), order = 'C'),\ 'c': w_s_c_.reshape(*np.concatenate([k['c'], k['t']]), order = 'C')} for h in ['c', 'd']: original_Lh = len(w[h].shape) new_Lh = len(k[h]) + new_Lt k_to_keep_ht = k_to_keep[h][:new_Lt] + k_to_keep['t'] assert (len(k_to_keep_ht) == new_Lh) new_k_idx_grid = np.ix_(*k_to_keep_ht) # If layer deletion, sum the last components of the paths # Not checked if original_Lh > new_Lh: deleted_dims = tuple(range(original_Lh)[new_Lt:]) w_s = w[h][new_k_idx_grid].sum(deleted_dims).flatten(order = 'C') else: w_s = w[h][new_k_idx_grid].flatten(order = 'C') # Renormalization w_s /= w_s.sum() w[h] = w_s w_s_c = w['c'] w_s_d = w['d'] return w_s_c, w_s_d
def _cv_beta(kernel_x, kernel_y, kernel_x_params, kernel_y_params, base_density, X, Y, lmbda, split): # fits on k-1 folds of the training dataset and repeats the operation. The output is a tensor beta of shape: # beta : k times N times d # k is the number of folds, N the number of data point in the k-1 folds and d is the dimension of the data. n_total, d = Y.shape n_train = split[0][0].shape[0] n_test = split[0][1].shape[0] K_X = kernel_x._kernel(kernel_x_params, X, X) G = _compute_G(kernel_y_params, kernel_y, Y, K_X) + n_train * lmbda * np.eye(n_total) G = np.linalg.inv(G) num_folds = 0 for train_idx, test_idx in split: train_idx_tot = d * np.repeat(train_idx, d) + np.tile( np.array(range(d)), train_idx.shape[0]) test_idx_tot = d * np.repeat(test_idx, d) + np.tile( np.array(range(d)), test_idx.shape[0]) h = _compute_h(kernel_y_params, kernel_y, base_density, Y[train_idx, :], K_X[np.ix_(train_idx, train_idx)]) GG = _compute_G(kernel_y_params, kernel_y, Y[train_idx, :], K_X[np.ix_(train_idx, train_idx)]) beta = np.matmul(G[np.ix_(train_idx_tot, train_idx_tot)], h) h = np.matmul(G[np.ix_(test_idx_tot, train_idx_tot)], h) beta_tmp = np.linalg.solve(G[np.ix_(test_idx_tot, test_idx_tot)], h) beta -= np.matmul(G[np.ix_(train_idx_tot, test_idx_tot)], beta_tmp) beta = beta / lmbda beta = np.reshape(beta, [1, -1, d]) if num_folds == 0: betas = 1. * beta else: betas = np.concatenate([betas, beta], axis=0) num_folds += 1 return betas
def propagate(m, s, plant, dynmodel, policy): angi = plant.angi poli = plant.poli dyni = plant.dyni difi = plant.difi D0 = len(m) D1 = D0 + 2 * len(angi) D2 = D1 + len(policy.max_u) M = np.array(m) S = s i, j = np.arange(D0), np.arange(D0, D1) m, s, c = gaussian_trig(M[i], S[np.ix_(i, i)], angi) q = np.matmul(S[np.ix_(i, i)], c) M = np.hstack([M, m]) S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])]) i, j = poli, np.arange(D1) m, s, c = policy.fcn(M[i], S[np.ix_(i, i)]) q = np.matmul(S[np.ix_(j, i)], c) M = np.hstack([M, m]) S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])]) i, j = np.hstack([dyni, np.arange(D1, D2)]), np.arange(D2) m, s, c = dynmodel.fcn(M[i], S[np.ix_(i, i)]) q = np.matmul(S[np.ix_(j, i)], c) M = np.hstack([M, m]) S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])]) P = np.hstack([np.zeros((D0, D2)), np.eye(D0)]) P = fill_mat(np.eye(len(difi)), P, difi, difi) M_next = np.matmul(P, M[:, newaxis]).flatten() S_next = P @ S @ P.T S_next = (S_next + S_next.T) / 2 return M_next, S_next
def multivariate_normal_logpdf(data, mus, Sigmas, mask=None): """ Compute the log probability density of a multivariate Gaussian distribution. This will broadcast as long as data, mus, Sigmas have the same (or at least compatible) leading dimensions. Parameters ---------- data : array_like (..., D) The points at which to evaluate the log density mus : array_like (..., D) The mean(s) of the Gaussian distribution(s) Sigmas : array_like (..., D, D) The covariances(s) of the Gaussian distribution(s) mask : array_like (..., D) bool Optional mask indicating which entries in the data are observed Returns ------- lps : array_like (...,) Log probabilities under the multivariate Gaussian distribution(s). """ # Check inputs D = data.shape[-1] assert mus.shape[-1] == D assert Sigmas.shape[-2] == Sigmas.shape[-1] == D # If there's no mask, we can just use the standard log pdf code if mask is None: return _multivariate_normal_logpdf(data, mus, Sigmas) # Otherwise we need to separate the data into sets with the same mask, # since each one will entail a different covariance matrix. # # First, determine the output shape. Allow mus and Sigmas to # have different shapes; e.g. many Gaussians with the same # covariance but different means. shp1 = np.broadcast(data, mus).shape[:-1] shp2 = np.broadcast(data[..., None], Sigmas).shape[:-2] assert len(shp1) == len(shp2) shp = tuple(max(s1, s2) for s1, s2 in zip(shp1, shp2)) # Broadcast the data into the full shape full_data = np.broadcast_to(data, shp + (D, )) # Get the full mask assert mask.dtype == bool assert mask.shape == data.shape full_mask = np.broadcast_to(mask, shp + (D, )) # Flatten the mask and get the unique values flat_data = flatten_to_dim(full_data, 1) flat_mask = flatten_to_dim(full_mask, 1) unique_masks, mask_index = np.unique(flat_mask, return_inverse=True, axis=0) # Initialize the output lls = np.nan * np.ones(flat_data.shape[0]) # Compute the log probability for each mask for i, this_mask in enumerate(unique_masks): this_inds = np.where(mask_index == i)[0] this_D = np.sum(this_mask) if this_D == 0: lls[this_inds] = 0 continue this_data = flat_data[np.ix_(this_inds, this_mask)] this_mus = mus[..., this_mask] this_Sigmas = Sigmas[np.ix_( *[np.ones(sz, dtype=bool) for sz in Sigmas.shape[:-2]], this_mask, this_mask)] # Precompute the Cholesky decomposition this_Ls = np.linalg.cholesky(this_Sigmas) # Broadcast mus and Sigmas to full shape and extract the necessary indices this_mus = flatten_to_dim(np.broadcast_to(this_mus, shp + (this_D, )), 1)[this_inds] this_Ls = flatten_to_dim( np.broadcast_to(this_Ls, shp + (this_D, this_D)), 2)[this_inds] # Evaluate the log likelihood lls[this_inds] = _multivariate_normal_logpdf(this_data, this_mus, this_Sigmas, Ls=this_Ls) # Reshape the output assert np.all(np.isfinite(lls)) return np.reshape(lls, shp)
def DDGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True): ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM) y (numobs x p ndarray): The observations containing categorical variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' prev_lik = -1E16 best_lik = -1E16 tol = 0.01 max_patience = 1 patience = 0 best_k = deepcopy(k) best_r = deepcopy(r) best_sil = -1 new_sil = -1 # Initialize the parameters eta = deepcopy(init['eta']) psi = deepcopy(init['psi']) lambda_bin = deepcopy(init['lambda_bin']) lambda_ord = deepcopy(init['lambda_ord']) lambda_categ = deepcopy(init['lambda_categ']) H = deepcopy(init['H']) w_s = deepcopy( init['w_s'] ) # Probability of path s' through the network for all s' in Omega numobs = len(y) likelihood = [] it_num = 0 ratio = 1000 np.random.seed = seed # Dispatch variables between categories y_bin = y[:, np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')] nj_bin = nj[np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')].astype(int) nb_bin = len(nj_bin) y_categ = y[:, var_distrib == 'categorical'] nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_categ = len(nj_categ) y_ord = y[:, var_distrib == 'ordinal'] nj_ord = nj[var_distrib == 'ordinal'].astype(int) nb_ord = len(nj_ord) L = len(k) k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)]) M = M_growth(1, r, numobs) assert nb_ord + nb_bin + nb_categ > 0 # Compute the Gower matrix cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli') dm = gower_matrix(y, cat_features=cat_features) while (it_num < it) & ((ratio > eps) | (patience <= max_patience)): print(it_num) # The clustering layer is the one used to perform the clustering # i.e. the layer l such that k[l] == n_clusters clustering_layer = np.argmax(np.array(k) == n_clusters) ##################################################################################### ################################# S step ############################################ ##################################################################################### #===================================================================== # Draw from f(z^{l} | s, Theta) for all s in Omega #===================================================================== mu_s, sigma_s = compute_path_params(eta, H, psi) sigma_s = ensure_psd(sigma_s) z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M) ''' print('mu_s', np.abs(mu_s[0]).mean()) print('sigma_s', np.abs(sigma_s[0]).mean()) print('z_s0', np.abs(z_s[0]).mean()) print('z_s1', np.abs(z_s[1]).mean(0)[:,0]) ''' #======================================================================== # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1 #======================================================================== chsi = compute_chsi(H, psi, mu_s, sigma_s) chsi = ensure_psd(chsi) rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi) # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively z2_z1s = draw_z2_z1s(chsi, rho, M, r) #======================================================================= # Compute the p(y| z1) for all variable categories #======================================================================= py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord, lambda_categ, y_categ, nj_categ, z_s[0]) #======================================================================== # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s #======================================================================== zl1_ys = draw_zl1_ys(z_s, py_zl1, M) ##################################################################################### ################################# E step ############################################ ##################################################################################### #===================================================================== # Compute conditional probabilities used in the appendix of asta paper #===================================================================== pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s, py_zl1) #del(py_zl1) #===================================================================== # Compute p(z^{(l)}| s, y). Equation (5) of the paper #===================================================================== pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S) pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s) #===================================================================== # Compute MFA expectations #===================================================================== Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \ E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S) ########################################################################### ############################ M step ####################################### ########################################################################### #======================================================= # Compute MFA Parameters #======================================================= w_s = np.mean(ps_y, axis=0) eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H, k) #======================================================= # Identifiability conditions #======================================================= # Update eta, H and Psi values H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) del (Ez) #======================================================= # Compute GLLVM Parameters #======================================================= # We optimize each column separately as it is faster than all column jointly # (and more relevant with the independence hypothesis) lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) ########################################################################### ################## Clustering parameters updating ######################### ########################################################################### new_lik = np.sum(np.log(p_y)) likelihood.append(new_lik) ratio = (new_lik - prev_lik) / abs(prev_lik) print(likelihood) idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1])) psl_y = ps_y.reshape(numobs, *k, order='C').sum(idx_to_sum) temp_class = np.argmax(psl_y, axis=1) try: new_sil = silhouette_score(dm, temp_class, metric='precomputed') except ValueError: new_sil = -1 print('Silhouette score:', new_sil) if best_sil < new_sil: z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1) best_sil = deepcopy(new_sil) classes = deepcopy(temp_class) fig = plt.figure(figsize=(8, 8)) plt.scatter(z[:, 0], z[:, 1]) plt.show() # Refresh the classes only if they provide a better explanation of the data if best_lik < new_lik: best_lik = deepcopy(prev_lik) if prev_lik < new_lik: patience = 0 M = M_growth(it_num + 2, r, numobs) else: patience += 1 ########################################################################### ######################## Parameter selection ############################# ########################################################################### is_not_min_specif = not (np.all(np.array(k) == n_clusters) & np.array_equal(r, [2, 1])) if look_for_simpler_network( it_num) & perform_selec & is_not_min_specif: r_to_keep = r_select(y_bin, y_ord, y_categ, zl1_ys, z2_z1s, w_s) # If r_l == 0, delete the last l + 1: layers new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1 k_to_keep = k_select(w_s, k, new_L, clustering_layer) is_L_unchanged = L == new_L is_r_unchanged = np.all( [len(r_to_keep[l]) == r[l] for l in range(new_L + 1)]) is_k_unchanged = np.all( [len(k_to_keep[l]) == k[l] for l in range(new_L)]) is_selection = not (is_r_unchanged & is_k_unchanged & is_L_unchanged) assert new_L > 0 if is_selection: eta = [eta[l][k_to_keep[l]] for l in range(new_L)] eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][k_to_keep[l]] for l in range(new_L)] H = [H[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)] psi = [psi[l][k_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)] if nb_bin > 0: # Add the intercept: bin_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1 ]) lambda_bin = lambda_bin[:, bin_r_to_keep] if nb_ord > 0: # Intercept coefficients handling is a little more complicated here lambda_ord_intercept = [ lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord ] Lambda_ord_var = np.stack( [lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord]) Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]] lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\ for j in range(nb_ord)] if nb_categ > 0: lambda_categ_intercept = [ lambda_categ[j][:, 0] for j in range(nb_categ) ] Lambda_categ_var = [ lambda_categ_j[:, -r[0]:] for lambda_categ_j in lambda_categ ] Lambda_categ_var = [ lambda_categ_j[:, r_to_keep[0]] for lambda_categ_j in lambda_categ ] lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\ for j in range(nb_categ)] w = w_s.reshape(*k, order='C') new_k_idx_grid = np.ix_(*k_to_keep[:new_L]) # If layer deletion, sum the last components of the paths if L > new_L: deleted_dims = tuple(range(L)[new_L:]) w_s = w[new_k_idx_grid].sum(deleted_dims).flatten( order='C') else: w_s = w[new_k_idx_grid].flatten(order='C') w_s /= w_s.sum() k = [len(k_to_keep[l]) for l in range(new_L)] r = [len(r_to_keep[l]) for l in range(new_L + 1)] k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)]) L = new_L patience = 0 best_r = deepcopy(r) best_k = deepcopy(k) # Identifiability conditions H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) print('New architecture:') print('k', k) print('r', r) print('L', L) print('S', S) print("w_s", len(w_s)) prev_lik = deepcopy(new_lik) it_num = it_num + 1 out = dict(likelihood = likelihood, classes = classes, z = z, \ best_r = best_r, best_k = best_k) return (out)
def toBlocks(mat, d): J11 = mat[np.ix_([0, d - 1], [0, d - 1])] J12 = mat[np.ix_([0, d - 1], [d, mat.shape[0] - 1])] J22 = mat[np.ix_([d, mat.shape[0] - 1], [d, mat.shape[0] - 1])] return J11, J12, J22
def load_celegans_network(props=np.ones((3, 4))): """" This function loads a connectome with a subsample of the entire connectome. The sub-sample is given by props. props[i,j] = proportion of neurons of category (i,j) to include category i = body position (Head = 0, Middle =1, Tail =2) category j = neuron type (Sensory = 0, Motor = 1, Interneuron =2, Poly-type =3) Besides names and positions of neurons, it outputs an array of adjacency matrix, for each type of connectivity (Synapse, electric junction and NMJ (?))""" NeuronTypeCSV = csv.reader(open('data/NeuronType.csv', 'r'), delimiter=',', skipinitialspace=True) neuron_info_all = [[] for index in range(4)] relevant_indexes = [0, 1, 2, 14] # load relevant information (names, numerica position, anatomical position and type) for row in NeuronTypeCSV: for j0, j in enumerate(relevant_indexes): neuron_info_all[j0].append(row[j].strip(' \t\n\r')) names_with_zeros = deepcopy(neuron_info_all[0]) # erase extra zeros in name for j in range(279): indZero = neuron_info_all[0][j].find('0') if (indZero >= 0 and indZero < len(neuron_info_all[0][j]) - 1): neuron_info_all[0][j] = neuron_info_all[0][j].replace('0', '') names = deepcopy(neuron_info_all[0]) xpos = np.array(neuron_info_all[1]) location = neuron_info_all[2] issensory = np.zeros(279) ismotor = np.zeros(279) isinterneuron = np.zeros(279) NeuronTypeISM = csv.reader(open('data/NeuronTypeISM.csv', 'r'), delimiter=',', skipinitialspace=True) for row in NeuronTypeISM: try: index = names.index(row[0]) words = row[2].lower() if ('sensory' in words): issensory[index] = 1 if ('motor' in words): ismotor[index] = 1 if ('interneuron' in words): isinterneuron[index] = 1 except: pass NeuronRemainingTypesISM = csv.reader(open( 'data/NeuronRemainingTypesISM.csv', 'r'), delimiter=',', skipinitialspace=True) for row in NeuronRemainingTypesISM: try: index = neuron_info_all[0].index(row[0]) words = row[1].lower() if ('sensory' in words): issensory[index] = 1 if ('motor' in words): ismotor[index] = 1 if ('interneuron' in words): isinterneuron[index] = 1 except: pass ConnectomeCSV = csv.reader(open('data/NeuronConnect.csv', 'r'), delimiter=',', skipinitialspace=True) As_weighted = np.zeros((3, 279, 279)) for row in ConnectomeCSV: try: index1 = names_with_zeros.index(row[0]) index2 = names_with_zeros.index(row[1]) if ('S' in row[2] or 'R' in row[2] or 'Sp' in row[2] or 'Rp' in row[2]): As_weighted[0, index1, index2] = As_weighted[0, index1, index2] + float( row[3]) if ('EJ' in row[2]): As_weighted[1, index1, index2] = As_weighted[1, index1, index2] + float( row[3]) if ('NMJ' in row[2]): As_weighted[2, index1, index2] = As_weighted[2, index1, index2] + float( row[3]) except: pass As = (As_weighted > 0).astype(int) ind_type = [[] for _ in range(4)] # 0=sensory,motor,interneuron,poly ind_type[0] = np.where( np.logical_and( np.logical_and(issensory.astype(bool), (1 - ismotor).astype(bool)), (1 - isinterneuron).astype(bool)))[0] ind_type[1] = np.where( np.logical_and( np.logical_and((1 - issensory).astype(bool), ismotor.astype(bool)), (1 - isinterneuron).astype(bool)))[0] ind_type[2] = np.where( np.logical_and( np.logical_and((1 - issensory).astype(bool), (1 - ismotor).astype(bool)), isinterneuron.astype(bool)))[0] ind_type[3] = np.where(issensory + ismotor + isinterneuron >= 2)[0] # Head, Middle, Tail ind_pos = [[] for _ in range(3)] ind_pos[0] = [i for i, j in enumerate(location) if j == 'H'] ind_pos[1] = [i for i, j in enumerate(location) if j == 'M'] ind_pos[2] = [i for i, j in enumerate(location) if j == 'T'] ind_type_pos_number = np.zeros((3, 4)) ind_type_pos = [[] for _ in range(3)] for j in range(3): ind_type_pos[j] = [[] for _ in range(4)] for i in range(4): for j in range(3): ind_type_pos[j][i] = [ val for val in ind_pos[j] if val in ind_type[i] ] ind_type_pos_number[j, i] = len(ind_type_pos[j][i]) ind_neuron_subsampled = [[] for _ in range(3) for _ in range(4)] for j in range(3): ind_neuron_subsampled[j] = [[] for _ in range(4)] for i in range(4): for j in range(3): try: ind_neuron_subsampled[j][i] = np.random.choice( ind_type_pos[j][i], np.floor(ind_type_pos_number[j, i] * props[j, i]).astype(int), replace=False) except: ind_neuron_subsampled[j][i] = [] ind_neuron_subsampled = np.sort( np.concatenate([ np.concatenate(ind_neuron_subsampled[j][:], axis=0) for j in range(3) ]).astype(int)) As = As[np.ix_(range(3), ind_neuron_subsampled, ind_neuron_subsampled)] xpos = np.array(deepcopy(xpos[ind_neuron_subsampled]).astype(float)) names = [j for j0, j in enumerate(names) if j0 in ind_neuron_subsampled] return As, names, xpos
plant.odei = odei plant.angi = angi plant.poli = poli plant.dyno = dyno plant.dyni = dyni plant.difi = difi m, s, c = gaussian_trig(mu0, S0, angi) m = np.hstack([mu0, m]) c = np.dot(S0, c) s = np.vstack([np.hstack([S0, c]), np.hstack([c.T, s])]) policy = GPModel() policy.max_u = [10] policy.p = { 'inputs': multivariate_normal(m[poli], s[np.ix_(poli, poli)], nc), 'targets': 0.1 * randn(nc, len(policy.max_u)), 'hyp': log([1, 1, 1, 0.7, 0.7, 1, 0.01]) } Loss.fcn = loss_cp cost = Loss() cost.p = 0.5 cost.gamma = 1 cost.width = [0.25] cost.angle = plant.angi cost.target = np.array([0, 0, 0, np.pi]) start = multivariate_normal(mu0, S0) x, y, L, latent = rollout(start, policy, plant, cost, H) policy.fcn = lambda m, s: concat(congp, gaussian_sin, policy, m, s)
def boxQP(H, g, lower, upper, x0): n = H.shape[0] clamped = np.zeros(n) free = np.ones(n) Hfree = np.zeros(n) oldvalue = 0 result = 0 nfactor = 0 clamp = lambda value: np.maximum(lower, np.minimum(upper, value)) maxIter = 100 minRelImprove = 1e-8 minGrad = 1e-8 stepDec = 0.6 minStep = 1e-22 Armijo = 0.1 if x0.shape[0] == n: x = clamp(x0) else: lu = np.array([lower, upper]) lu[np.isnan(lu)] = np.nan x = np.nanmean(lu, axis=1) value = np.dot(x.T, np.dot(H, x)) + np.dot(x.T, g) for iteration in range(maxIter): if result != 0: break if iteration > 1 and (oldvalue - value) < minRelImprove * abs(oldvalue): result = 4 logging.info("[QP info] Improvement smaller than tolerance") break oldvalue = value grad = g + np.dot(H, x) old_clamped = clamped clamped = np.zeros(n) clamped[np.logical_and(x == lower, grad > 0)] = 1 clamped[np.logical_and(x == upper, grad < 0)] = 1 free = np.logical_not(clamped) if np.all(clamped): result = 6 logging.info("[QP info] All dimensions are clamped") break if iteration == 0: factorize = True else: factorize = np.any(old_clamped != clamped) if factorize: try: if not np.all(np.allclose(H, H.T)): H = np.triu(H) Hfree = np.linalg.cholesky(H[np.ix_(free, free)]) except LinAlgError: eigs, _ = np.linalg.eig(H[np.ix_(free, free)]) print(eigs) result = -1 logging.info("[QP info] Hessian is not positive definite") break nfactor += 1 gnorm = np.linalg.norm(grad[free]) if gnorm < minGrad: result = 5 logging.info("[QP info] Gradient norm smaller than tolerance") break grad_clamped = g + np.dot(H, x*clamped) search = np.zeros(n) y = np.linalg.lstsq(Hfree.T, grad_clamped[free])[0] search[free] = -np.linalg.lstsq(Hfree, y)[0] - x[free] sdotg = np.sum(search*grad) if sdotg >= 0: print(f"[QP info] No descent direction found. Should not happen. Grad is {grad}") break # armijo linesearch step = 1 nstep = 0 xc = clamp(x + step*search) vc = np.dot(xc.T, g) + 0.5*np.dot(xc.T, np.dot(H, xc)) while (vc - oldvalue) / (step*sdotg) < Armijo: step *= stepDec nstep += 1 xc = clamp(x + step * search) vc = np.dot(xc.T, g) + 0.5 * np.dot(xc.T, np.dot(H, xc)) if step < minStep: result = 2 break # accept candidate x = xc value = vc # print(f"[QP info] Iteration {iteration}, value of the cost: {vc}") if iteration >= maxIter: result = 1 return x, result, Hfree, free
def permute(self, perm): """ Permute the discrete latent states. """ self.log_Ps = self.log_Ps[np.ix_(perm, perm)]
def permute(self, perm): self.log_Ps = self.log_Ps[np.ix_(perm, perm)] self.weights[-1] = self.weights[-1][:,perm] self.biases[-1] = self.biases[-1][perm]
def M1DGMM(y, n_clusters, r, k, init, var_distrib, nj, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\ dm = [], max_patience = 1, use_silhouette = True):# dm small hack to remove ''' Fit a Generalized Linear Mixture of Latent Variables Model (GLMLVM) y (numobs x p ndarray): The observations containing mixed variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not use_silhouette (Bool): If True use the silhouette as quality criterion (best for clustering) else use the likelihood (best for data augmentation). ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' prev_lik = - 1E16 best_lik = -1E16 best_sil = -1 new_sil = -1 tol = 0.01 patience = 0 is_looking_for_better_arch = False # Initialize the parameters eta = deepcopy(init['eta']) psi = deepcopy(init['psi']) lambda_bin = deepcopy(init['lambda_bin']) lambda_ord = deepcopy(init['lambda_ord']) lambda_cont = deepcopy(init['lambda_cont']) lambda_categ = deepcopy(init['lambda_categ']) H = deepcopy(init['H']) w_s = deepcopy(init['w_s']) # Probability of path s' through the network for all s' in Omega numobs = len(y) likelihood = [] silhouette = [] it_num = 0 ratio = 1000 np.random.seed = seed out = {} # Store the full output # Dispatch variables between categories y_bin = y[:, np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')] nj_bin = nj[np.logical_or(var_distrib == 'bernoulli',var_distrib == 'binomial')].astype(int) nb_bin = len(nj_bin) y_ord = y[:, var_distrib == 'ordinal'] nj_ord = nj[var_distrib == 'ordinal'].astype(int) nb_ord = len(nj_ord) y_categ = y[:, var_distrib == 'categorical'] nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_categ = len(nj_categ) y_cont = y[:, var_distrib == 'continuous'].astype(float) nb_cont = y_cont.shape[1] # Set y_count standard error to 1 y_cont = y_cont / y_cont.std(axis = 0, keepdims = True) L = len(k) k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(L + 1)]) M = M_growth(1, r, numobs) assert nb_bin + nb_ord + nb_cont + nb_categ > 0 if nb_bin + nb_ord + nb_cont + nb_categ != len(var_distrib): raise ValueError('Some variable types were not understood,\ existing types are: continuous, categorical,\ ordinal, binomial and bernoulli') # Compute the Gower matrix if len(dm) == 0: cat_features = np.logical_or(var_distrib == 'categorical', var_distrib == 'bernoulli') dm = gower_matrix(y, cat_features = cat_features) # Do not stop the iterations if there are some iterations left or if the likelihood is increasing # or if we have not reached the maximum patience and if a new architecture was looked for # in the previous iteration while ((it_num < it) & (ratio > eps) & (patience <= max_patience)) | is_looking_for_better_arch: print(it_num) # The clustering layer is the one used to perform the clustering # i.e. the layer l such that k[l] == n_clusters if not(isnumeric(n_clusters)): if n_clusters == 'auto': clustering_layer = 0 else: raise ValueError('Please enter an int or "auto" for n_clusters') else: assert (np.array(k) == n_clusters).any() clustering_layer = np.argmax(np.array(k) == n_clusters) ##################################################################################### ################################# S step ############################################ ##################################################################################### #===================================================================== # Draw from f(z^{l} | s, Theta) for all s in Omega #===================================================================== mu_s, sigma_s = compute_path_params(eta, H, psi) sigma_s = ensure_psd(sigma_s) z_s, zc_s = draw_z_s(mu_s, sigma_s, eta, M) #======================================================================== # Draw from f(z^{l+1} | z^{l}, s, Theta) for l >= 1 #======================================================================== chsi = compute_chsi(H, psi, mu_s, sigma_s) chsi = ensure_psd(chsi) rho = compute_rho(eta, H, psi, mu_s, sigma_s, zc_s, chsi) # In the following z2 and z1 will denote z^{l+1} and z^{l} respectively z2_z1s = draw_z2_z1s(chsi, rho, M, r) #======================================================================= # Compute the p(y| z1) for all variable categories #======================================================================= py_zl1 = fy_zl1(lambda_bin, y_bin, nj_bin, lambda_ord, y_ord, nj_ord, \ lambda_categ, y_categ, nj_categ, y_cont, lambda_cont, z_s[0]) #======================================================================== # Draw from p(z1 | y, s) proportional to p(y | z1) * p(z1 | s) for all s #======================================================================== zl1_ys = draw_zl1_ys(z_s, py_zl1, M) ##################################################################################### ################################# E step ############################################ ##################################################################################### #===================================================================== # Compute conditional probabilities used in the appendix of asta paper #===================================================================== pzl1_ys, ps_y, p_y = E_step_GLLVM(z_s[0], mu_s[0], sigma_s[0], w_s, py_zl1) #===================================================================== # Compute p(z^{(l)}| s, y). Equation (5) of the paper #===================================================================== pz2_z1s = fz2_z1s(t(pzl1_ys, (1, 0, 2)), z2_z1s, chsi, rho, S) pz_ys = fz_ys(t(pzl1_ys, (1, 0, 2)), pz2_z1s) #===================================================================== # Compute MFA expectations #===================================================================== Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys = \ E_step_DGMM(zl1_ys, H, z_s, zc_s, z2_z1s, pz_ys, pz2_z1s, S) ########################################################################### ############################ M step ####################################### ########################################################################### #======================================================= # Compute MFA Parameters #======================================================= w_s = np.mean(ps_y, axis = 0) eta, H, psi = M_step_DGMM(Ez_ys, E_z1z2T_ys, E_z2z2T_ys, EeeT_ys, ps_y, H, k) #======================================================= # Identifiability conditions #======================================================= # Update eta, H and Psi values H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) del(Ez) #======================================================= # Compute GLLVM Parameters #======================================================= lambda_bin = bin_params_GLLVM(y_bin, nj_bin, lambda_bin, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_ord = ord_params_GLLVM(y_ord, nj_ord, lambda_ord, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_categ = categ_params_GLLVM(y_categ, nj_categ, lambda_categ, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) lambda_cont = cont_params_GLLVM(y_cont, lambda_cont, ps_y, pzl1_ys, z_s[0], AT[0],\ tol = tol, maxstep = maxstep) ########################################################################### ################## Clustering parameters updating ######################### ########################################################################### new_lik = np.sum(np.log(p_y)) likelihood.append(new_lik) silhouette.append(new_sil) ratio = abs((new_lik - prev_lik)/prev_lik) idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1])) psl_y = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum) temp_class = np.argmax(psl_y, axis = 1) try: new_sil = silhouette_score(dm, temp_class, metric = 'precomputed') except ValueError: new_sil = -1 # Store the params according to the silhouette or likelihood is_better = (best_sil < new_sil) if use_silhouette else (best_lik < new_lik) if is_better: z = (ps_y[..., n_axis] * Ez_ys[clustering_layer]).sum(1) best_sil = deepcopy(new_sil) classes = deepcopy(temp_class) ''' plt.figure(figsize=(8,8)) plt.scatter(z[:, 0], z[:, 1], c = classes) plt.show() ''' # Store the output out['classes'] = deepcopy(classes) out['best_z'] = deepcopy(z_s[0]) out['Ez.y'] = z out['best_k'] = deepcopy(k) out['best_r'] = deepcopy(r) out['best_w_s'] = deepcopy(w_s) out['lambda_bin'] = deepcopy(lambda_bin) out['lambda_ord'] = deepcopy(lambda_ord) out['lambda_categ'] = deepcopy(lambda_categ) out['lambda_cont'] = deepcopy(lambda_cont) out['eta'] = deepcopy(eta) out['mu'] = deepcopy(mu_s) out['sigma'] = deepcopy(sigma_s) out['psl_y'] = deepcopy(psl_y) out['ps_y'] = deepcopy(ps_y) # Refresh the classes only if they provide a better explanation of the data if best_lik < new_lik: best_lik = deepcopy(prev_lik) if prev_lik < new_lik: patience = 0 M = M_growth(it_num + 2, r, numobs) else: patience += 1 ########################################################################### ######################## Parameter selection ############################# ########################################################################### min_nb_clusters = 2 if isnumeric(n_clusters): # To change when add multi mode is_not_min_specif = not(np.all(np.array(k) == n_clusters) & np.array_equal(r, [2,1])) else: is_not_min_specif = not(np.all(np.array(k) == min_nb_clusters) & np.array_equal(r, [2,1])) is_looking_for_better_arch = look_for_simpler_network(it_num) & perform_selec & is_not_min_specif if is_looking_for_better_arch: r_to_keep = r_select(y_bin, y_ord, y_categ, y_cont, zl1_ys, z2_z1s, w_s) # If r_l == 0, delete the last l + 1: layers new_L = np.sum([len(rl) != 0 for rl in r_to_keep]) - 1 k_to_keep = k_select(w_s, k, new_L, clustering_layer, not(isnumeric(n_clusters))) is_L_unchanged = (L == new_L) is_r_unchanged = np.all([len(r_to_keep[l]) == r[l] for l in range(new_L + 1)]) is_k_unchanged = np.all([len(k_to_keep[l]) == k[l] for l in range(new_L)]) is_selection = not(is_r_unchanged & is_k_unchanged & is_L_unchanged) assert new_L > 0 if is_selection: eta = [eta[l][k_to_keep[l]] for l in range(new_L)] eta = [eta[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][k_to_keep[l]] for l in range(new_L)] H = [H[l][:, r_to_keep[l]] for l in range(new_L)] H = [H[l][:, :, r_to_keep[l + 1]] for l in range(new_L)] psi = [psi[l][k_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, r_to_keep[l]] for l in range(new_L)] psi = [psi[l][:, :, r_to_keep[l]] for l in range(new_L)] if nb_bin > 0: # Add the intercept: bin_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) lambda_bin = lambda_bin[:, bin_r_to_keep] if nb_ord > 0: # Intercept coefficients handling is a little more complicated here lambda_ord_intercept = [lambda_ord_j[:-r[0]] for lambda_ord_j in lambda_ord] Lambda_ord_var = np.stack([lambda_ord_j[-r[0]:] for lambda_ord_j in lambda_ord]) Lambda_ord_var = Lambda_ord_var[:, r_to_keep[0]] lambda_ord = [np.concatenate([lambda_ord_intercept[j], Lambda_ord_var[j]])\ for j in range(nb_ord)] # To recheck if nb_cont > 0: # Add the intercept: cont_r_to_keep = np.concatenate([[0], np.array(r_to_keep[0]) + 1]) lambda_cont = lambda_cont[:, cont_r_to_keep] if nb_categ > 0: lambda_categ_intercept = [lambda_categ[j][:, 0] for j in range(nb_categ)] Lambda_categ_var = [lambda_categ_j[:,-r[0]:] for lambda_categ_j in lambda_categ] Lambda_categ_var = [lambda_categ_j[:, r_to_keep[0]] for lambda_categ_j in lambda_categ] lambda_categ = [np.hstack([lambda_categ_intercept[j][..., n_axis], Lambda_categ_var[j]])\ for j in range(nb_categ)] w = w_s.reshape(*k, order = 'C') new_k_idx_grid = np.ix_(*k_to_keep[:new_L]) # If layer deletion, sum the last components of the paths if L > new_L: deleted_dims = tuple(range(L)[new_L:]) w_s = w[new_k_idx_grid].sum(deleted_dims).flatten(order = 'C') else: w_s = w[new_k_idx_grid].flatten(order = 'C') w_s /= w_s.sum() # Refresh the classes: TO RECHECK #idx_to_sum = tuple(set(range(1, L + 1)) - set([clustering_layer + 1])) #ps_y_tmp = ps_y.reshape(numobs, *k, order = 'C').sum(idx_to_sum) #np.argmax(ps_y_tmp[:, k_to_keep[0]], axis = 1) k = [len(k_to_keep[l]) for l in range(new_L)] r = [len(r_to_keep[l]) for l in range(new_L + 1)] k_aug = k + [1] S = np.array([np.prod(k_aug[l:]) for l in range(new_L + 1)]) L = new_L patience = 0 # Identifiability conditions H = diagonal_cond(H, psi) Ez, AT = compute_z_moments(w_s, eta, H, psi) eta, H, psi = identifiable_estim_DGMM(eta, H, psi, Ez, AT) del(Ez) print('New architecture:') print('k', k) print('r', r) print('L', L) print('S',S) print("w_s", len(w_s)) prev_lik = deepcopy(new_lik) it_num = it_num + 1 print(likelihood) print(silhouette) out['likelihood'] = likelihood out['silhouette'] = silhouette return(out)