def clusterize_mat(X, n_clusters, reord_mat=False, reord_method='eta-trick'): # X2 = X.copy() # minX = X2.min() # X2 -= minX if reord_mat: if reord_method == 'eta-trick': my_method = SpectralEtaTrick(n_iter=10) elif reord_method == 'mdso': my_method = SpectralOrdering() else: my_method = SpectralBaseline() ebd = spectral_embedding(X - X.min(), norm_laplacian='random_walk', norm_adjacency=False) N = X.shape[0] if n_clusters == 1: if reord_mat: return (X, np.arange(N)) else: return (X) else: fied_vec = ebd[:, 0] fied_diff = abs(fied_vec[1:] - fied_vec[:-1]) bps = np.append(0, np.argsort(-fied_diff)[:n_clusters - 1]) bps = np.append(bps, N) bps = np.sort(bps) x_flat = X.flatten() s_clus = np.zeros(N**2) if reord_mat: permu = np.zeros(0, dtype='int32') for k_ in range(n_clusters): in_clst = np.arange(bps[k_], bps[k_ + 1]) if not in_clst.size: print("empty cluster!") continue iis = np.repeat(in_clst, len(in_clst)) jjs = np.tile(in_clst, len(in_clst)) sub_idx = np.ravel_multi_index((iis, jjs), (N, N)) s_clus[sub_idx] = x_flat[sub_idx] # Projection on block matrices if reord_mat: sub_mat = X.copy()[in_clst, :] sub_mat = sub_mat.T[in_clst, :].T sub_perm = my_method.fit_transform(sub_mat - sub_mat.min()) sub_cc = in_clst[sub_perm] permu = np.append(permu, sub_cc) S_clus = np.reshape(s_clus, (N, N)) if reord_mat: return (S_clus, permu) else: return (S_clus)
def ser_dupli_alt_clust2(A, C, seriation_solver='eta-trick', n_iter=100, n_clusters=8, do_strong=False, include_main_diag=True, do_show=True, Z_true=None): (n_, n1) = A.shape n2 = len(C) N = int(np.sum(C)) assert (n_ == n1 and n_ == n2) if seriation_solver == 'mdso': my_solver = SpectralOrdering(norm_laplacian='random_walk') elif seriation_solver == 'eta-trick': my_solver = SpectralEtaTrick(n_iter=10) else: # use basic spectral Algorithm from Atkins et. al. my_solver = SpectralBaseline() cluster_solver = SpectralClustering(n_clusters=n_clusters, affinity='precomputed') # Initialization Z = np.zeros((n_, N)) jj = 0 for ii in range(n_): # TODO : make this faster ? Z[ii, jj:jj + C[ii]] = 1 jj += C[ii] dc = np.diag(1. / C) S_t = Z.T @ dc @ A @ dc @ Z max_val = A.max() # max_val = S_t.max() perm_tot = np.arange(N) # Iterate for it in range(n_iter): # S_old # S_t -= S_t.min() # to make sure it is non-negative after linprog # Reorder the matrix permu = my_solver.fit_transform(S_t) # S_tp = S_t[permu, :][:, permu] S_tp = S_t.copy()[permu, :] S_tp = S_tp.T[permu, :].T R_t = proj2Rmat(S_tp, do_strong=do_strong, include_main_diag=include_main_diag, verbose=0, u_b=max_val) print(R_t.min()) R_t -= R_t.min() # (iis, jjs, vvs) = find(R_t) # qv = np.percentile(vvs, 50) # iis = iis[vvs>qv] # jjs = jjs[vvs>qv] # vvs = vvs[vvs>qv] # R_t = coo_matrix((vvs, (iis, jjs)), shape=R_t.shape) # R_t = R_t.toarray() ebd = spectral_embedding(R_t, norm_laplacian=False) if n_clusters > 1: # fied_vec = ebd[:, 0] # fied_diff = abs(fied_vec[1:] - fied_vec[:-1]) # bps = np.append(0, np.argsort(-fied_diff)[:n_clusters-1]) # bps = np.append(bps, N) # bps = np.sort(bps) # bps = get_k_necks(R_t, n_clusters-1) # bps = np.append(0, bps) # bps = np.append(bps, N) # bps = np.sort(bps) bps = np.array([0, N]) else: bps = np.array([0, N]) print(bps) labels_ = np.zeros(N) # for labels_[bps[]] Z = Z[:, permu] # perm_tot = perm_tot[permu] # Cluster the similarity matrix # labels_ = cluster_solver.fit_predict(R_t.max() - R_t) # print(sum(labels_)) # Reorder each cluster s_clus = np.zeros(N**2) # TODO: adapt to the sparse case s_flat = R_t.flatten() permu2 = np.zeros(0, dtype='int32') # permu = np.arange(N) for k_ in range(n_clusters): # in_clst = np.where(labels_ == k_)[0] in_clst = np.arange(bps[k_], bps[k_ + 1]) # sub_mat = R_t[in_clst, :] # sub_mat = sub_mat.T[in_clst, :].T # sub_perm = my_solver.fit_transform(sub_mat) # sub_cc = in_clst[sub_perm] sub_cc = in_clst # inv_sub_perm = np.argsort(sub_perm) # permu[in_clst] = sub_cc # in_clst[inv_sub_perm] # permu[in_clst] = in_clst[inv_sub_perm] permu2 = np.append(permu2, sub_cc) # (iis, jjs) = np.meshgrid(in_clst, in_clst) # iis = iis.flatten() # jjs = jjs.flatten() iis = np.repeat(in_clst, len(in_clst)) jjs = np.tile(in_clst, len(in_clst)) sub_idx = np.ravel_multi_index((iis, jjs), (N, N)) # # (iord, jord) = np.meshgrid(sub_cc, sub_cc) # iord = iord.flatten() # jord = jord.flatten() # sub_ord = np.ravel_multi_index((iord, jord), (N, N)) # s_clus[sub_idx] = s_flat[sub_idx] # Projection on block matrices # S_clus[in_clst, :][:, in_clst] += sub_mat # is_identity = (np.all(permu == np.arange(N)) or # np.all(permu == np.arange(N)[::-1])) # if is_identity: # break alpha_ = 0. S_clus = (1 - alpha_) * np.reshape(s_clus, (N, N)) + alpha_ * S_t # S_clus = np.reshape(s_clus, (N, N)) S_tp = S_clus.copy()[permu2, :] # S_tp = S_t.copy()[permu, :] S_tp = S_tp.T[permu2, :].T # S_tp = S_tp.T[permu, :].T # R_t = proj2Rmat(S_tp, do_strong=do_strong, # include_main_diag=include_main_diag, verbose=0, # u_b=max_val) # R_t = S_tp double_perm = permu[permu2] Z = Z[:, permu2] perm_tot = perm_tot[double_perm] if do_show: title = "iter {}".format(int(it)) if Z_true is not None: mean_dist, _, is_inv = eval_assignments(Z, Z_true) title += " mean dist {}".format(mean_dist) # if is_inv: # Z = Z[:, ::-1] visualize_mat(S_t, S_tp, R_t, Z, ebd, title, Z_true=Z_true) S_t = proj2dupli(S_tp, Z, A, u_b=max_val, k_sparse=None, include_main_diag=include_main_diag) return (S_t, Z, R_t)
def spectral_eta_trick2(X, n_iter=50, dh=1, p=1, return_score=False, do_plot=False, circular=False, norm_laplacian=None, norm_adjacency=None, eigen_solver=None, scale_embedding=False, add_momentum=None): """ Performs Spectral Eta-trick Algorithm from https://arxiv.org/pdf/1806.00664.pdf which calls several instances of the Spectral Ordering baseline (Atkins) to try to minimize 1-SUM or Huber-SUM (instead of 2-SUM) with the so-called eta-trick. """ (n, n2) = X.shape assert(n == n2) if n < 3: best_perm = np.arange(n) if return_score: return(best_perm, -1) else: return(best_perm) spectral_algo = SpectralBaseline(circular=circular, norm_laplacian=norm_laplacian, norm_adjacency=norm_adjacency, eigen_solver=eigen_solver, scale_embedding=scale_embedding) best_perm = np.arange(n) best_score = n**(p+2) if issparse(X): if not isinstance(X, coo_matrix): X = coo_matrix(X) r, c, v = X.row, X.col, X.data eta_vec = np.ones(len(v)) if add_momentum: eta_old = np.ones(len(v)) for it in range(n_iter): X_w = X.copy() X_w.data /= eta_vec embedding = spectral_embedding(X_w) new_perm = np.argsort(embedding[:, 0]) # new_perm = spectral_algo.fit_transform(X_w) if np.all(new_perm == best_perm): break if new_perm[0] > new_perm[-1]: embedding = embedding[::-1, :] new_perm *= -1 new_perm += (n-1) new_score = p_sum_score(X, permut=new_perm, p=p) if new_score < best_score: best_perm = new_perm p_inv = np.argsort(new_perm) # eta_vec = abs(p_inv[r] - p_inv[c]) d_ = 3 eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1) # if circular: # # pass # eta_vec = np.minimum(eta_vec, n - eta_vec) # eta_vec = np.maximum(dh, eta_vec) if do_plot: title = "it %d, %d-SUM: %1.5e" % (it, p, new_score) plot_mat(X, permut=new_perm, title=title) else: eta_mat = np.ones((n, n)) for it in range(n_iter): X_w = np.divide(X, eta_mat) embedding = spectral_embedding(X_w) new_perm = np.argsort(embedding[:, 0]) # new_perm = spectral_algo.fit_transform(X_w) # if new_perm[0] > new_perm[-1]: # embedding = embedding[::-1, :] # new_perm *= -1 # new_perm += (n-1) # if np.all(new_perm == best_perm): # break new_score = p_sum_score(X, permut=new_perm, p=p) if new_score < best_score: best_perm = new_perm p_inv = np.argsort(new_perm) d_ = 5 d_ = min(n-1, d_) # eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1) eta_mat = np.identity(n).flatten() for dim in range(d_): # eta_mat = eta_mat + abs(np.tile(embedding[:, dim], n) - np.repeat(embedding[:, dim], n)) d_perm = np.argsort(embedding[:, dim]) d_perm = (1./(1 + dim)) * np.argsort(d_perm) eta_mat = eta_mat + abs(np.tile(d_perm, n) - np.repeat(d_perm, n)) # eta_mat = abs(np.tile(p_inv, n) - np.repeat(p_inv, n)) # if circular: # # pass # eta_mat = np.minimum(eta_mat, n - eta_mat) eta_mat = np.reshape(eta_mat, (n, n)) # eta_mat = np.maximum(dh, eta_mat) if do_plot: title = "it %d, %d-SUM: %1.5e" % (it, p, new_score) plot_mat(X, permut=new_perm, title=title) if return_score: return(best_perm, best_score) else: return(best_perm)
def spectral_eta_trick3(X, n_iter=50, dh=1, score_function='Huber', return_score=False, do_plot=False, circular=False, norm_laplacian=None, norm_adjacency=None, eigen_solver=None, scale_embedding=False, add_momentum=None, avg_dim=1, avg_scaling=True): """ Performs Spectral Eta-trick Algorithm from https://arxiv.org/pdf/1806.00664.pdf which calls several instances of the Spectral Ordering baseline (Atkins) to try to minimize 1-SUM or Huber-SUM (instead of 2-SUM) with the so-called eta-trick. Parameters ---------- n_iter : int, default 50 Number of iterations. score_function : string, default pSUM Which score we aim to minimize. Either '1SUM', '2SUM', 'Huber', 'R2S' (robust 2SUM function from the paper). If Huber or R2S, it is computer with the parameter dh provided. By design, the algorithm seeks to minimize the Huber loss. However, we keep the permutation that yields the best score amongst all, according to the score computed with score_function. dh : int, default 1 Parameter for the Huber loss minimized. circular : boolean, default False Whether we wish to find a circular or a linear ordering. eigen_solver : string, default 'arpack' Solver for the eigenvectors computations. Can be 'arpack', 'amg', or 'lopbcg'. 'amg' is faster for large sparse matrices but requires the pyamg package. add_momentum : Nonetype or float, default None. gamma parameter in Algorithm... from the paper. If gamma > 0, we set eta_{t+1} = gamma * eta_t + (1-gamma) * eta^*, where eta^* is the solution at iteration (t). avg_dim : int, default 1. Number of dimensions to use in the spectral embedding. If d = 1, it is the regular eta trick with eta = |pi_i - pi_j|. If d > 1, instead we sum |pi^k_i - pi^k_j| over the d first dimensions, where pi^k is the permutation that sorts the coordinates of the k-th dimension of the spectral embedding (not just the first, which is the Fiedler vector). avg_scaling : boolean, default True. If avg_dim > 1, the previous sum is weighted by the default scaling 1/(1+k) if avg_scaling = True. return_score : boolean, default False. Whether to return the best score (computed with score function) or not. norm_laplacian : string, default "unnormalized" type of normalization of the Laplacian. Can be "unnormalized", "random_walk", or "symmetric". norm_adjacency : str or bool, default 'coifman' If 'coifman', use the normalization of the similarity matrix, W = Dinv @ W @ Dinv, to account for non uniform sampling of points on a 1d manifold (from Lafon and Coifman's approximation of the Laplace Beltrami operator) Otherwise, leave the adjacency matrix as it is. TODO : also implement the 'sinkhorn' normalization scale_embedding : string or boolean, default True if scaled is False, the embedding is just the concatenation of the eigenvectors of the Laplacian, i.e., all dimensions have the same weight. if scaled is "CTD", the k-th dimension of the spectral embedding (k-th eigen-vector) is re-scaled by 1/sqrt(lambda_k), in relation with the commute-time-distance. If scaled is True or set to another string than "CTD", then the heuristic scaling 1/sqrt(k) is used instead. """ (n, n2) = X.shape assert(n == n2) if n < 3: best_perm = np.arange(n) if return_score: return(best_perm, -1) else: return(best_perm) best_perm = np.arange(n) best_score = compute_score(X, score_function=score_function, dh=dh, perm=None) if issparse(X): if not isinstance(X, coo_matrix): X = coo_matrix(X) r, c, v = X.row, X.col, X.data eta_vec = np.ones(len(v)) if add_momentum: eta_old = np.ones(len(v)) for it in range(n_iter): X_w = X.copy() X_w.data /= eta_vec default_dim = 8 if avg_dim > default_dim: default_dim = avg_dim + 1 embedding = spectral_embedding(X_w, norm_laplacian=norm_laplacian, norm_adjacency=norm_adjacency, eigen_solver=eigen_solver, scale_embedding=scale_embedding, n_components=default_dim) new_perm = np.argsort(embedding[:, 0]) # new_perm = spectral_algo.fit_transform(X_w) if np.all(new_perm == best_perm): break if new_perm[0] > new_perm[-1]: embedding = embedding[::-1, :] new_perm *= -1 new_perm += (n-1) new_score = compute_score(X, score_function=score_function, dh=dh, perm=new_perm) if new_score < best_score: best_perm = new_perm p_inv = np.argsort(new_perm) # eta_vec = abs(p_inv[r] - p_inv[c]) eta_vec = np.zeros(len(r)) d_ = min(avg_dim, n-1) for dim in range(d_): # eta_mat = eta_mat + abs(np.tile(embedding[:, dim], n) - np.repeat(embedding[:, dim], n)) d_perm = np.argsort(embedding[:, dim]) d_perm = np.argsort(d_perm) eta_add = abs(d_perm[r] - d_perm[c]) if circular: eta_add = np.minimum(eta_add, n - eta_add) eta_add = np.maximum(dh, eta_add) if avg_scaling: eta_add = eta_add * 1./np.sqrt(1 + dim) eta_vec += eta_add # eta_mat = eta_mat + abs(np.tile(d_perm, n) - np.repeat(d_perm, n)) # eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1) # if circular: # # pass # eta_vec = np.minimum(eta_vec, n - eta_vec) # eta_vec = np.maximum(dh, eta_vec) if do_plot: title = "it %d, score: %1.5e" % (it, new_score) plot_mat(X, permut=new_perm, title=title) else: eta_mat = np.ones((n, n)) for it in range(n_iter): X_w = np.divide(X, eta_mat) default_dim = 8 if avg_dim > default_dim: default_dim = avg_dim + 1 embedding = spectral_embedding(X_w, norm_laplacian=norm_laplacian, norm_adjacency=norm_adjacency, eigen_solver=eigen_solver, scale_embedding=scale_embedding, n_components=default_dim) new_perm = np.argsort(embedding[:, 0]) # new_perm = spectral_algo.fit_transform(X_w) # if new_perm[0] > new_perm[-1]: # embedding = embedding[::-1, :] # new_perm *= -1 # new_perm += (n-1) # if np.all(new_perm == best_perm): # break new_score = compute_score(X, score_function=score_function, dh=dh, perm=new_perm) if new_score < best_score: best_perm = new_perm p_inv = np.argsort(new_perm) d_ = min(avg_dim, n-1) # eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1) eta_mat = np.identity(n).flatten() for dim in range(d_): # eta_mat = eta_mat + abs(np.tile(embedding[:, dim], n) - np.repeat(embedding[:, dim], n)) d_perm = np.argsort(embedding[:, dim]) d_perm = np.argsort(d_perm) eta_add = abs(np.tile(d_perm, n) - np.repeat(d_perm, n)) if circular: eta_add = np.minimum(eta_add, n - eta_add) eta_add = np.maximum(dh, eta_add) if avg_scaling: eta_add = eta_add * 1./np.sqrt((1 + dim)) eta_mat = eta_mat + eta_add # eta_mat = abs(np.tile(p_inv, n) - np.repeat(p_inv, n)) # if circular: # # pass # eta_mat = np.minimum(eta_mat, n - eta_mat) eta_mat = np.reshape(eta_mat, (n, n)) # eta_mat = np.maximum(dh, eta_mat) if do_plot: title = "it %d, score: %1.5e" % (it, new_score) plot_mat(X, permut=new_perm, title=title) if return_score: return(best_perm, best_score) else: return(best_perm)
def get_embedding(adjacency, n_components=8, eigen_solver=None, random_state=None, eigen_tol=1e-15, norm_laplacian=False, drop_first=True, norm_adjacency=False, scale_embedding=False, verb=0, method='spectral'): drop_first = False if method == 'cMDS': embedding = classical_MDS_embedding(adjacency, n_components=n_components, eigen_solver=eigen_solver, random_state=random_state, eigen_tol=eigen_tol, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, drop_first=drop_first, scale_embedding=scale_embedding, verb=verb) elif method == 'MDS': embedding = metric_MDS_embedding(adjacency, n_components=n_components, eigen_solver=eigen_solver, random_state=random_state, eigen_tol=eigen_tol, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, drop_first=drop_first, scale_embedding=scale_embedding, verb=verb, metric=True) elif method == 'NMDS': embedding = metric_MDS_embedding(adjacency, n_components=n_components, eigen_solver=eigen_solver, random_state=random_state, eigen_tol=eigen_tol, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, drop_first=drop_first, scale_embedding=scale_embedding, verb=verb, metric=False) elif method == 'TSNE': embedding = tSNE_embedding(adjacency, n_components=n_components, eigen_solver=eigen_solver, random_state=random_state, eigen_tol=eigen_tol, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, drop_first=drop_first, scale_embedding=scale_embedding, verb=verb) else: drop_first = True embedding = spectral_embedding(adjacency, n_components=n_components, eigen_solver=eigen_solver, random_state=random_state, eigen_tol=eigen_tol, norm_adjacency=norm_adjacency, norm_laplacian=norm_laplacian, drop_first=drop_first, scale_embedding=scale_embedding, verb=verb) return embedding